1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions |
4 | * |
5 | * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. |
10 | */ |
11 | |
12 | #include <linux/linkage.h> |
13 | #include <asm/assembler.h> |
14 | |
15 | .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 |
16 | .set .Lv\b\().2d, \b |
17 | .set .Lv\b\().16b, \b |
18 | .endr |
19 | |
20 | /* |
21 | * ARMv8.2 Crypto Extensions instructions |
22 | */ |
23 | .macro eor3, rd, rn, rm, ra |
24 | .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) |
25 | .endm |
26 | |
27 | .macro rax1, rd, rn, rm |
28 | .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) |
29 | .endm |
30 | |
31 | .macro bcax, rd, rn, rm, ra |
32 | .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) |
33 | .endm |
34 | |
35 | .macro xar, rd, rn, rm, imm6 |
36 | .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) |
37 | .endm |
38 | |
39 | /* |
40 | * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) |
41 | */ |
42 | .text |
43 | SYM_FUNC_START(sha3_ce_transform) |
44 | /* load state */ |
45 | add x8, x0, #32 |
46 | ld1 { v0.1d- v3.1d}, [x0] |
47 | ld1 { v4.1d- v7.1d}, [x8], #32 |
48 | ld1 { v8.1d-v11.1d}, [x8], #32 |
49 | ld1 {v12.1d-v15.1d}, [x8], #32 |
50 | ld1 {v16.1d-v19.1d}, [x8], #32 |
51 | ld1 {v20.1d-v23.1d}, [x8], #32 |
52 | ld1 {v24.1d}, [x8] |
53 | |
54 | 0: sub w2, w2, #1 |
55 | mov w8, #24 |
56 | adr_l x9, .Lsha3_rcon |
57 | |
58 | /* load input */ |
59 | ld1 {v25.8b-v28.8b}, [x1], #32 |
60 | ld1 {v29.8b-v31.8b}, [x1], #24 |
61 | eor v0.8b, v0.8b, v25.8b |
62 | eor v1.8b, v1.8b, v26.8b |
63 | eor v2.8b, v2.8b, v27.8b |
64 | eor v3.8b, v3.8b, v28.8b |
65 | eor v4.8b, v4.8b, v29.8b |
66 | eor v5.8b, v5.8b, v30.8b |
67 | eor v6.8b, v6.8b, v31.8b |
68 | |
69 | tbnz x3, #6, 2f // SHA3-512 |
70 | |
71 | ld1 {v25.8b-v28.8b}, [x1], #32 |
72 | ld1 {v29.8b-v30.8b}, [x1], #16 |
73 | eor v7.8b, v7.8b, v25.8b |
74 | eor v8.8b, v8.8b, v26.8b |
75 | eor v9.8b, v9.8b, v27.8b |
76 | eor v10.8b, v10.8b, v28.8b |
77 | eor v11.8b, v11.8b, v29.8b |
78 | eor v12.8b, v12.8b, v30.8b |
79 | |
80 | tbnz x3, #4, 1f // SHA3-384 or SHA3-224 |
81 | |
82 | // SHA3-256 |
83 | ld1 {v25.8b-v28.8b}, [x1], #32 |
84 | eor v13.8b, v13.8b, v25.8b |
85 | eor v14.8b, v14.8b, v26.8b |
86 | eor v15.8b, v15.8b, v27.8b |
87 | eor v16.8b, v16.8b, v28.8b |
88 | b 3f |
89 | |
90 | 1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 |
91 | |
92 | // SHA3-224 |
93 | ld1 {v25.8b-v28.8b}, [x1], #32 |
94 | ld1 {v29.8b}, [x1], #8 |
95 | eor v13.8b, v13.8b, v25.8b |
96 | eor v14.8b, v14.8b, v26.8b |
97 | eor v15.8b, v15.8b, v27.8b |
98 | eor v16.8b, v16.8b, v28.8b |
99 | eor v17.8b, v17.8b, v29.8b |
100 | b 3f |
101 | |
102 | // SHA3-512 |
103 | 2: ld1 {v25.8b-v26.8b}, [x1], #16 |
104 | eor v7.8b, v7.8b, v25.8b |
105 | eor v8.8b, v8.8b, v26.8b |
106 | |
107 | 3: sub w8, w8, #1 |
108 | |
109 | eor3 v29.16b, v4.16b, v9.16b, v14.16b |
110 | eor3 v26.16b, v1.16b, v6.16b, v11.16b |
111 | eor3 v28.16b, v3.16b, v8.16b, v13.16b |
112 | eor3 v25.16b, v0.16b, v5.16b, v10.16b |
113 | eor3 v27.16b, v2.16b, v7.16b, v12.16b |
114 | eor3 v29.16b, v29.16b, v19.16b, v24.16b |
115 | eor3 v26.16b, v26.16b, v16.16b, v21.16b |
116 | eor3 v28.16b, v28.16b, v18.16b, v23.16b |
117 | eor3 v25.16b, v25.16b, v15.16b, v20.16b |
118 | eor3 v27.16b, v27.16b, v17.16b, v22.16b |
119 | |
120 | rax1 v30.2d, v29.2d, v26.2d // bc[0] |
121 | rax1 v26.2d, v26.2d, v28.2d // bc[2] |
122 | rax1 v28.2d, v28.2d, v25.2d // bc[4] |
123 | rax1 v25.2d, v25.2d, v27.2d // bc[1] |
124 | rax1 v27.2d, v27.2d, v29.2d // bc[3] |
125 | |
126 | eor v0.16b, v0.16b, v30.16b |
127 | xar v29.2d, v1.2d, v25.2d, (64 - 1) |
128 | xar v1.2d, v6.2d, v25.2d, (64 - 44) |
129 | xar v6.2d, v9.2d, v28.2d, (64 - 20) |
130 | xar v9.2d, v22.2d, v26.2d, (64 - 61) |
131 | xar v22.2d, v14.2d, v28.2d, (64 - 39) |
132 | xar v14.2d, v20.2d, v30.2d, (64 - 18) |
133 | xar v31.2d, v2.2d, v26.2d, (64 - 62) |
134 | xar v2.2d, v12.2d, v26.2d, (64 - 43) |
135 | xar v12.2d, v13.2d, v27.2d, (64 - 25) |
136 | xar v13.2d, v19.2d, v28.2d, (64 - 8) |
137 | xar v19.2d, v23.2d, v27.2d, (64 - 56) |
138 | xar v23.2d, v15.2d, v30.2d, (64 - 41) |
139 | xar v15.2d, v4.2d, v28.2d, (64 - 27) |
140 | xar v28.2d, v24.2d, v28.2d, (64 - 14) |
141 | xar v24.2d, v21.2d, v25.2d, (64 - 2) |
142 | xar v8.2d, v8.2d, v27.2d, (64 - 55) |
143 | xar v4.2d, v16.2d, v25.2d, (64 - 45) |
144 | xar v16.2d, v5.2d, v30.2d, (64 - 36) |
145 | xar v5.2d, v3.2d, v27.2d, (64 - 28) |
146 | xar v27.2d, v18.2d, v27.2d, (64 - 21) |
147 | xar v3.2d, v17.2d, v26.2d, (64 - 15) |
148 | xar v25.2d, v11.2d, v25.2d, (64 - 10) |
149 | xar v26.2d, v7.2d, v26.2d, (64 - 6) |
150 | xar v30.2d, v10.2d, v30.2d, (64 - 3) |
151 | |
152 | bcax v20.16b, v31.16b, v22.16b, v8.16b |
153 | bcax v21.16b, v8.16b, v23.16b, v22.16b |
154 | bcax v22.16b, v22.16b, v24.16b, v23.16b |
155 | bcax v23.16b, v23.16b, v31.16b, v24.16b |
156 | bcax v24.16b, v24.16b, v8.16b, v31.16b |
157 | |
158 | ld1r {v31.2d}, [x9], #8 |
159 | |
160 | bcax v17.16b, v25.16b, v19.16b, v3.16b |
161 | bcax v18.16b, v3.16b, v15.16b, v19.16b |
162 | bcax v19.16b, v19.16b, v16.16b, v15.16b |
163 | bcax v15.16b, v15.16b, v25.16b, v16.16b |
164 | bcax v16.16b, v16.16b, v3.16b, v25.16b |
165 | |
166 | bcax v10.16b, v29.16b, v12.16b, v26.16b |
167 | bcax v11.16b, v26.16b, v13.16b, v12.16b |
168 | bcax v12.16b, v12.16b, v14.16b, v13.16b |
169 | bcax v13.16b, v13.16b, v29.16b, v14.16b |
170 | bcax v14.16b, v14.16b, v26.16b, v29.16b |
171 | |
172 | bcax v7.16b, v30.16b, v9.16b, v4.16b |
173 | bcax v8.16b, v4.16b, v5.16b, v9.16b |
174 | bcax v9.16b, v9.16b, v6.16b, v5.16b |
175 | bcax v5.16b, v5.16b, v30.16b, v6.16b |
176 | bcax v6.16b, v6.16b, v4.16b, v30.16b |
177 | |
178 | bcax v3.16b, v27.16b, v0.16b, v28.16b |
179 | bcax v4.16b, v28.16b, v1.16b, v0.16b |
180 | bcax v0.16b, v0.16b, v2.16b, v1.16b |
181 | bcax v1.16b, v1.16b, v27.16b, v2.16b |
182 | bcax v2.16b, v2.16b, v28.16b, v27.16b |
183 | |
184 | eor v0.16b, v0.16b, v31.16b |
185 | |
186 | cbnz w8, 3b |
187 | cond_yield 4f, x8, x9 |
188 | cbnz w2, 0b |
189 | |
190 | /* save state */ |
191 | 4: st1 { v0.1d- v3.1d}, [x0], #32 |
192 | st1 { v4.1d- v7.1d}, [x0], #32 |
193 | st1 { v8.1d-v11.1d}, [x0], #32 |
194 | st1 {v12.1d-v15.1d}, [x0], #32 |
195 | st1 {v16.1d-v19.1d}, [x0], #32 |
196 | st1 {v20.1d-v23.1d}, [x0], #32 |
197 | st1 {v24.1d}, [x0] |
198 | mov w0, w2 |
199 | ret |
200 | SYM_FUNC_END(sha3_ce_transform) |
201 | |
202 | .section ".rodata" , "a" |
203 | .align 8 |
204 | .Lsha3_rcon: |
205 | .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a |
206 | .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 |
207 | .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a |
208 | .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a |
209 | .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 |
210 | .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 |
211 | .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 |
212 | .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 |
213 | |