1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
4 * as specified in rfc8998
5 * https://datatracker.ietf.org/doc/html/rfc8998
6 *
7 * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
8 */
9
10#include <linux/linkage.h>
11#include <linux/cfi_types.h>
12#include <asm/assembler.h>
13#include "sm4-ce-asm.h"
14
15.arch armv8-a+crypto
16
17.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
18 .set .Lv\b\().4s, \b
19.endr
20
21.macro sm4e, vd, vn
22 .inst 0xcec08400 | (.L\vn << 5) | .L\vd
23.endm
24
25/* Register macros */
26
27#define RMAC v16
28
29/* Helper macros. */
30
31#define inc_le128(vctr) \
32 mov vctr.d[1], x8; \
33 mov vctr.d[0], x7; \
34 adds x8, x8, #1; \
35 rev64 vctr.16b, vctr.16b; \
36 adc x7, x7, xzr;
37
38
39.align 3
40SYM_FUNC_START(sm4_ce_cbcmac_update)
41 /* input:
42 * x0: round key array, CTX
43 * x1: mac
44 * x2: src
45 * w3: nblocks
46 */
47 SM4_PREPARE(x0)
48
49 ld1 {RMAC.16b}, [x1]
50
51.Lcbcmac_loop_4x:
52 cmp w3, #4
53 blt .Lcbcmac_loop_1x
54
55 sub w3, w3, #4
56
57 ld1 {v0.16b-v3.16b}, [x2], #64
58
59 SM4_CRYPT_BLK(RMAC)
60 eor RMAC.16b, RMAC.16b, v0.16b
61 SM4_CRYPT_BLK(RMAC)
62 eor RMAC.16b, RMAC.16b, v1.16b
63 SM4_CRYPT_BLK(RMAC)
64 eor RMAC.16b, RMAC.16b, v2.16b
65 SM4_CRYPT_BLK(RMAC)
66 eor RMAC.16b, RMAC.16b, v3.16b
67
68 cbz w3, .Lcbcmac_end
69 b .Lcbcmac_loop_4x
70
71.Lcbcmac_loop_1x:
72 sub w3, w3, #1
73
74 ld1 {v0.16b}, [x2], #16
75
76 SM4_CRYPT_BLK(RMAC)
77 eor RMAC.16b, RMAC.16b, v0.16b
78
79 cbnz w3, .Lcbcmac_loop_1x
80
81.Lcbcmac_end:
82 st1 {RMAC.16b}, [x1]
83 ret
84SYM_FUNC_END(sm4_ce_cbcmac_update)
85
86.align 3
87SYM_FUNC_START(sm4_ce_ccm_final)
88 /* input:
89 * x0: round key array, CTX
90 * x1: ctr0 (big endian, 128 bit)
91 * x2: mac
92 */
93 SM4_PREPARE(x0)
94
95 ld1 {RMAC.16b}, [x2]
96 ld1 {v0.16b}, [x1]
97
98 SM4_CRYPT_BLK2(RMAC, v0)
99
100 /* en-/decrypt the mac with ctr0 */
101 eor RMAC.16b, RMAC.16b, v0.16b
102 st1 {RMAC.16b}, [x2]
103
104 ret
105SYM_FUNC_END(sm4_ce_ccm_final)
106
107.align 3
108SYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
109 /* input:
110 * x0: round key array, CTX
111 * x1: dst
112 * x2: src
113 * x3: ctr (big endian, 128 bit)
114 * w4: nbytes
115 * x5: mac
116 */
117 SM4_PREPARE(x0)
118
119 ldp x7, x8, [x3]
120 rev x7, x7
121 rev x8, x8
122
123 ld1 {RMAC.16b}, [x5]
124
125.Lccm_enc_loop_4x:
126 cmp w4, #(4 * 16)
127 blt .Lccm_enc_loop_1x
128
129 sub w4, w4, #(4 * 16)
130
131 /* construct CTRs */
132 inc_le128(v8) /* +0 */
133 inc_le128(v9) /* +1 */
134 inc_le128(v10) /* +2 */
135 inc_le128(v11) /* +3 */
136
137 ld1 {v0.16b-v3.16b}, [x2], #64
138
139 SM4_CRYPT_BLK2(v8, RMAC)
140 eor v8.16b, v8.16b, v0.16b
141 eor RMAC.16b, RMAC.16b, v0.16b
142 SM4_CRYPT_BLK2(v9, RMAC)
143 eor v9.16b, v9.16b, v1.16b
144 eor RMAC.16b, RMAC.16b, v1.16b
145 SM4_CRYPT_BLK2(v10, RMAC)
146 eor v10.16b, v10.16b, v2.16b
147 eor RMAC.16b, RMAC.16b, v2.16b
148 SM4_CRYPT_BLK2(v11, RMAC)
149 eor v11.16b, v11.16b, v3.16b
150 eor RMAC.16b, RMAC.16b, v3.16b
151
152 st1 {v8.16b-v11.16b}, [x1], #64
153
154 cbz w4, .Lccm_enc_end
155 b .Lccm_enc_loop_4x
156
157.Lccm_enc_loop_1x:
158 cmp w4, #16
159 blt .Lccm_enc_tail
160
161 sub w4, w4, #16
162
163 /* construct CTRs */
164 inc_le128(v8)
165
166 ld1 {v0.16b}, [x2], #16
167
168 SM4_CRYPT_BLK2(v8, RMAC)
169 eor v8.16b, v8.16b, v0.16b
170 eor RMAC.16b, RMAC.16b, v0.16b
171
172 st1 {v8.16b}, [x1], #16
173
174 cbz w4, .Lccm_enc_end
175 b .Lccm_enc_loop_1x
176
177.Lccm_enc_tail:
178 /* construct CTRs */
179 inc_le128(v8)
180
181 SM4_CRYPT_BLK2(RMAC, v8)
182
183 /* store new MAC */
184 st1 {RMAC.16b}, [x5]
185
186.Lccm_enc_tail_loop:
187 ldrb w0, [x2], #1 /* get 1 byte from input */
188 umov w9, v8.b[0] /* get top crypted CTR byte */
189 umov w6, RMAC.b[0] /* get top MAC byte */
190
191 eor w9, w9, w0 /* w9 = CTR ^ input */
192 eor w6, w6, w0 /* w6 = MAC ^ input */
193
194 strb w9, [x1], #1 /* store out byte */
195 strb w6, [x5], #1 /* store MAC byte */
196
197 subs w4, w4, #1
198 beq .Lccm_enc_ret
199
200 /* shift out one byte */
201 ext RMAC.16b, RMAC.16b, RMAC.16b, #1
202 ext v8.16b, v8.16b, v8.16b, #1
203
204 b .Lccm_enc_tail_loop
205
206.Lccm_enc_end:
207 /* store new MAC */
208 st1 {RMAC.16b}, [x5]
209
210 /* store new CTR */
211 rev x7, x7
212 rev x8, x8
213 stp x7, x8, [x3]
214
215.Lccm_enc_ret:
216 ret
217SYM_FUNC_END(sm4_ce_ccm_enc)
218
219.align 3
220SYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
221 /* input:
222 * x0: round key array, CTX
223 * x1: dst
224 * x2: src
225 * x3: ctr (big endian, 128 bit)
226 * w4: nbytes
227 * x5: mac
228 */
229 SM4_PREPARE(x0)
230
231 ldp x7, x8, [x3]
232 rev x7, x7
233 rev x8, x8
234
235 ld1 {RMAC.16b}, [x5]
236
237.Lccm_dec_loop_4x:
238 cmp w4, #(4 * 16)
239 blt .Lccm_dec_loop_1x
240
241 sub w4, w4, #(4 * 16)
242
243 /* construct CTRs */
244 inc_le128(v8) /* +0 */
245 inc_le128(v9) /* +1 */
246 inc_le128(v10) /* +2 */
247 inc_le128(v11) /* +3 */
248
249 ld1 {v0.16b-v3.16b}, [x2], #64
250
251 SM4_CRYPT_BLK2(v8, RMAC)
252 eor v8.16b, v8.16b, v0.16b
253 eor RMAC.16b, RMAC.16b, v8.16b
254 SM4_CRYPT_BLK2(v9, RMAC)
255 eor v9.16b, v9.16b, v1.16b
256 eor RMAC.16b, RMAC.16b, v9.16b
257 SM4_CRYPT_BLK2(v10, RMAC)
258 eor v10.16b, v10.16b, v2.16b
259 eor RMAC.16b, RMAC.16b, v10.16b
260 SM4_CRYPT_BLK2(v11, RMAC)
261 eor v11.16b, v11.16b, v3.16b
262 eor RMAC.16b, RMAC.16b, v11.16b
263
264 st1 {v8.16b-v11.16b}, [x1], #64
265
266 cbz w4, .Lccm_dec_end
267 b .Lccm_dec_loop_4x
268
269.Lccm_dec_loop_1x:
270 cmp w4, #16
271 blt .Lccm_dec_tail
272
273 sub w4, w4, #16
274
275 /* construct CTRs */
276 inc_le128(v8)
277
278 ld1 {v0.16b}, [x2], #16
279
280 SM4_CRYPT_BLK2(v8, RMAC)
281 eor v8.16b, v8.16b, v0.16b
282 eor RMAC.16b, RMAC.16b, v8.16b
283
284 st1 {v8.16b}, [x1], #16
285
286 cbz w4, .Lccm_dec_end
287 b .Lccm_dec_loop_1x
288
289.Lccm_dec_tail:
290 /* construct CTRs */
291 inc_le128(v8)
292
293 SM4_CRYPT_BLK2(RMAC, v8)
294
295 /* store new MAC */
296 st1 {RMAC.16b}, [x5]
297
298.Lccm_dec_tail_loop:
299 ldrb w0, [x2], #1 /* get 1 byte from input */
300 umov w9, v8.b[0] /* get top crypted CTR byte */
301 umov w6, RMAC.b[0] /* get top MAC byte */
302
303 eor w9, w9, w0 /* w9 = CTR ^ input */
304 eor w6, w6, w9 /* w6 = MAC ^ output */
305
306 strb w9, [x1], #1 /* store out byte */
307 strb w6, [x5], #1 /* store MAC byte */
308
309 subs w4, w4, #1
310 beq .Lccm_dec_ret
311
312 /* shift out one byte */
313 ext RMAC.16b, RMAC.16b, RMAC.16b, #1
314 ext v8.16b, v8.16b, v8.16b, #1
315
316 b .Lccm_dec_tail_loop
317
318.Lccm_dec_end:
319 /* store new MAC */
320 st1 {RMAC.16b}, [x5]
321
322 /* store new CTR */
323 rev x7, x7
324 rev x8, x8
325 stp x7, x8, [x3]
326
327.Lccm_dec_ret:
328 ret
329SYM_FUNC_END(sm4_ce_ccm_dec)
330

source code of linux/arch/arm64/crypto/sm4-ce-ccm-core.S