1// SPDX-License-Identifier: GPL-2.0
2/*
3 * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
4 *
5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 */
7
8#include <asm/hwcap.h>
9#include <asm/neon.h>
10#include <asm/simd.h>
11#include <asm/unaligned.h>
12#include <crypto/algapi.h>
13#include <crypto/internal/hash.h>
14#include <crypto/internal/poly1305.h>
15#include <crypto/internal/simd.h>
16#include <linux/cpufeature.h>
17#include <linux/crypto.h>
18#include <linux/jump_label.h>
19#include <linux/module.h>
20
21asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
22asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
23asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
25
26static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
27
28void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
29{
30 poly1305_init_arm64(state: &dctx->h, key);
31 dctx->s[0] = get_unaligned_le32(p: key + 16);
32 dctx->s[1] = get_unaligned_le32(p: key + 20);
33 dctx->s[2] = get_unaligned_le32(p: key + 24);
34 dctx->s[3] = get_unaligned_le32(p: key + 28);
35 dctx->buflen = 0;
36}
37EXPORT_SYMBOL(poly1305_init_arch);
38
39static int neon_poly1305_init(struct shash_desc *desc)
40{
41 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
42
43 dctx->buflen = 0;
44 dctx->rset = 0;
45 dctx->sset = false;
46
47 return 0;
48}
49
50static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
51 u32 len, u32 hibit, bool do_neon)
52{
53 if (unlikely(!dctx->sset)) {
54 if (!dctx->rset) {
55 poly1305_init_arm64(state: &dctx->h, key: src);
56 src += POLY1305_BLOCK_SIZE;
57 len -= POLY1305_BLOCK_SIZE;
58 dctx->rset = 1;
59 }
60 if (len >= POLY1305_BLOCK_SIZE) {
61 dctx->s[0] = get_unaligned_le32(p: src + 0);
62 dctx->s[1] = get_unaligned_le32(p: src + 4);
63 dctx->s[2] = get_unaligned_le32(p: src + 8);
64 dctx->s[3] = get_unaligned_le32(p: src + 12);
65 src += POLY1305_BLOCK_SIZE;
66 len -= POLY1305_BLOCK_SIZE;
67 dctx->sset = true;
68 }
69 if (len < POLY1305_BLOCK_SIZE)
70 return;
71 }
72
73 len &= ~(POLY1305_BLOCK_SIZE - 1);
74
75 if (static_branch_likely(&have_neon) && likely(do_neon))
76 poly1305_blocks_neon(state: &dctx->h, src, len, hibit);
77 else
78 poly1305_blocks(state: &dctx->h, src, len, hibit);
79}
80
81static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
82 const u8 *src, u32 len, bool do_neon)
83{
84 if (unlikely(dctx->buflen)) {
85 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
86
87 memcpy(dctx->buf + dctx->buflen, src, bytes);
88 src += bytes;
89 len -= bytes;
90 dctx->buflen += bytes;
91
92 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
93 neon_poly1305_blocks(dctx, src: dctx->buf,
94 POLY1305_BLOCK_SIZE, hibit: 1, do_neon: false);
95 dctx->buflen = 0;
96 }
97 }
98
99 if (likely(len >= POLY1305_BLOCK_SIZE)) {
100 neon_poly1305_blocks(dctx, src, len, hibit: 1, do_neon);
101 src += round_down(len, POLY1305_BLOCK_SIZE);
102 len %= POLY1305_BLOCK_SIZE;
103 }
104
105 if (unlikely(len)) {
106 dctx->buflen = len;
107 memcpy(dctx->buf, src, len);
108 }
109}
110
111static int neon_poly1305_update(struct shash_desc *desc,
112 const u8 *src, unsigned int srclen)
113{
114 bool do_neon = crypto_simd_usable() && srclen > 128;
115 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
116
117 if (static_branch_likely(&have_neon) && do_neon)
118 kernel_neon_begin();
119 neon_poly1305_do_update(dctx, src, len: srclen, do_neon);
120 if (static_branch_likely(&have_neon) && do_neon)
121 kernel_neon_end();
122 return 0;
123}
124
125void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
126 unsigned int nbytes)
127{
128 if (unlikely(dctx->buflen)) {
129 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
130
131 memcpy(dctx->buf + dctx->buflen, src, bytes);
132 src += bytes;
133 nbytes -= bytes;
134 dctx->buflen += bytes;
135
136 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
137 poly1305_blocks(state: &dctx->h, src: dctx->buf, POLY1305_BLOCK_SIZE, hibit: 1);
138 dctx->buflen = 0;
139 }
140 }
141
142 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
143 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
144
145 if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
146 do {
147 unsigned int todo = min_t(unsigned int, len, SZ_4K);
148
149 kernel_neon_begin();
150 poly1305_blocks_neon(state: &dctx->h, src, len: todo, hibit: 1);
151 kernel_neon_end();
152
153 len -= todo;
154 src += todo;
155 } while (len);
156 } else {
157 poly1305_blocks(state: &dctx->h, src, len, hibit: 1);
158 src += len;
159 }
160 nbytes %= POLY1305_BLOCK_SIZE;
161 }
162
163 if (unlikely(nbytes)) {
164 dctx->buflen = nbytes;
165 memcpy(dctx->buf, src, nbytes);
166 }
167}
168EXPORT_SYMBOL(poly1305_update_arch);
169
170void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
171{
172 if (unlikely(dctx->buflen)) {
173 dctx->buf[dctx->buflen++] = 1;
174 memset(dctx->buf + dctx->buflen, 0,
175 POLY1305_BLOCK_SIZE - dctx->buflen);
176 poly1305_blocks(state: &dctx->h, src: dctx->buf, POLY1305_BLOCK_SIZE, hibit: 0);
177 }
178
179 poly1305_emit(state: &dctx->h, digest: dst, nonce: dctx->s);
180 memzero_explicit(s: dctx, count: sizeof(*dctx));
181}
182EXPORT_SYMBOL(poly1305_final_arch);
183
184static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
185{
186 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
187
188 if (unlikely(!dctx->sset))
189 return -ENOKEY;
190
191 poly1305_final_arch(dctx, dst);
192 return 0;
193}
194
195static struct shash_alg neon_poly1305_alg = {
196 .init = neon_poly1305_init,
197 .update = neon_poly1305_update,
198 .final = neon_poly1305_final,
199 .digestsize = POLY1305_DIGEST_SIZE,
200 .descsize = sizeof(struct poly1305_desc_ctx),
201
202 .base.cra_name = "poly1305",
203 .base.cra_driver_name = "poly1305-neon",
204 .base.cra_priority = 200,
205 .base.cra_blocksize = POLY1305_BLOCK_SIZE,
206 .base.cra_module = THIS_MODULE,
207};
208
209static int __init neon_poly1305_mod_init(void)
210{
211 if (!cpu_have_named_feature(ASIMD))
212 return 0;
213
214 static_branch_enable(&have_neon);
215
216 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
217 crypto_register_shash(alg: &neon_poly1305_alg) : 0;
218}
219
220static void __exit neon_poly1305_mod_exit(void)
221{
222 if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
223 crypto_unregister_shash(alg: &neon_poly1305_alg);
224}
225
226module_init(neon_poly1305_mod_init);
227module_exit(neon_poly1305_mod_exit);
228
229MODULE_LICENSE("GPL v2");
230MODULE_ALIAS_CRYPTO("poly1305");
231MODULE_ALIAS_CRYPTO("poly1305-neon");
232

source code of linux/arch/arm64/crypto/poly1305-glue.c