1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
4 *
5 * Copyright (C) 2013 - 2017 Linaro Ltd.
6 * Copyright (C) 2024 Google LLC
7 *
8 * Author: Ard Biesheuvel <ardb@kernel.org>
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 .text
15 .arch armv8-a+crypto
16
17 .macro load_round_keys, rk, nr, tmp
18 sub w\tmp, \nr, #10
19 add \tmp, \rk, w\tmp, sxtw #4
20 ld1 {v10.4s-v13.4s}, [\rk]
21 ld1 {v14.4s-v17.4s}, [\tmp], #64
22 ld1 {v18.4s-v21.4s}, [\tmp], #64
23 ld1 {v3.4s-v5.4s}, [\tmp]
24 .endm
25
26 .macro dround, va, vb, vk
27 aese \va\().16b, \vk\().16b
28 aesmc \va\().16b, \va\().16b
29 aese \vb\().16b, \vk\().16b
30 aesmc \vb\().16b, \vb\().16b
31 .endm
32
33 .macro aes_encrypt, va, vb, nr
34 tbz \nr, #2, .L\@
35 dround \va, \vb, v10
36 dround \va, \vb, v11
37 tbz \nr, #1, .L\@
38 dround \va, \vb, v12
39 dround \va, \vb, v13
40.L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
41 dround \va, \vb, \v
42 .endr
43 aese \va\().16b, v4.16b
44 aese \vb\().16b, v4.16b
45 .endm
46
47 .macro aes_ccm_do_crypt,enc
48 load_round_keys x3, w4, x10
49
50 ld1 {v0.16b}, [x5] /* load mac */
51 cbz x2, ce_aes_ccm_final
52 ldr x8, [x6, #8] /* load lower ctr */
53CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
540: /* outer loop */
55 ld1 {v1.8b}, [x6] /* load upper ctr */
56 prfm pldl1strm, [x1]
57 add x8, x8, #1
58 rev x9, x8
59 ins v1.d[1], x9 /* no carry in lower ctr */
60
61 aes_encrypt v0, v1, w4
62
63 subs w2, w2, #16
64 bmi ce_aes_ccm_crypt_tail
65 ld1 {v2.16b}, [x1], #16 /* load next input block */
66 .if \enc == 1
67 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
68 eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
69 .else
70 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
71 eor v6.16b, v2.16b, v5.16b /* final round enc */
72 .endif
73 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
74 st1 {v6.16b}, [x0], #16 /* write output block */
75 bne 0b
76CPU_LE( rev x8, x8 )
77 str x8, [x6, #8] /* store lsb end of ctr (BE) */
78 cbnz x7, ce_aes_ccm_final
79 st1 {v0.16b}, [x5] /* store mac */
80 ret
81 .endm
82
83SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
84 eor v0.16b, v0.16b, v5.16b /* final round mac */
85 eor v1.16b, v1.16b, v5.16b /* final round enc */
86
87 add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
88 add x0, x0, w2, sxtw /* rewind the output pointer */
89
90 adr_l x8, .Lpermute /* load permute vectors */
91 add x9, x8, w2, sxtw
92 sub x8, x8, w2, sxtw
93 ld1 {v7.16b-v8.16b}, [x9]
94 ld1 {v9.16b}, [x8]
95
96 ld1 {v2.16b}, [x1] /* load a full block of input */
97 tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
98 eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */
99 bif v2.16b, v7.16b, v22.16b /* select plaintext */
100 tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
101 tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */
102 eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */
103
104 st1 {v7.16b}, [x0] /* store output block */
105 cbz x7, 0f
106
107SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
108 ld1 {v1.16b}, [x7] /* load 1st ctriv */
109
110 aes_encrypt v0, v1, w4
111
112 /* final round key cancels out */
113 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
1140: st1 {v0.16b}, [x5] /* store result */
115 ret
116SYM_FUNC_END(ce_aes_ccm_crypt_tail)
117
118 /*
119 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
120 * u8 const rk[], u32 rounds, u8 mac[],
121 * u8 ctr[], u8 const final_iv[]);
122 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
123 * u8 const rk[], u32 rounds, u8 mac[],
124 * u8 ctr[], u8 const final_iv[]);
125 */
126SYM_FUNC_START(ce_aes_ccm_encrypt)
127 movi v22.16b, #255
128 aes_ccm_do_crypt 1
129SYM_FUNC_END(ce_aes_ccm_encrypt)
130
131SYM_FUNC_START(ce_aes_ccm_decrypt)
132 movi v22.16b, #0
133 aes_ccm_do_crypt 0
134SYM_FUNC_END(ce_aes_ccm_decrypt)
135
136 .section ".rodata", "a"
137 .align 6
138 .fill 15, 1, 0xff
139.Lpermute:
140 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
141 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
142 .fill 15, 1, 0xff
143

source code of linux/arch/arm64/crypto/aes-ce-ccm-core.S