1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | # |
3 | # Accelerated AES-GCM stitched implementation for ppc64le. |
4 | # |
5 | # Copyright 2022- IBM Inc. All rights reserved |
6 | # |
7 | #=================================================================================== |
8 | # Written by Danny Tsen <dtsen@linux.ibm.com> |
9 | # |
10 | # GHASH is based on the Karatsuba multiplication method. |
11 | # |
12 | # Xi xor X1 |
13 | # |
14 | # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = |
15 | # (X1.h * H4.h + xX.l * H4.l + X1 * H4) + |
16 | # (X2.h * H3.h + X2.l * H3.l + X2 * H3) + |
17 | # (X3.h * H2.h + X3.l * H2.l + X3 * H2) + |
18 | # (X4.h * H.h + X4.l * H.l + X4 * H) |
19 | # |
20 | # Xi = v0 |
21 | # H Poly = v2 |
22 | # Hash keys = v3 - v14 |
23 | # ( H.l, H, H.h) |
24 | # ( H^2.l, H^2, H^2.h) |
25 | # ( H^3.l, H^3, H^3.h) |
26 | # ( H^4.l, H^4, H^4.h) |
27 | # |
28 | # v30 is IV |
29 | # v31 - counter 1 |
30 | # |
31 | # AES used, |
32 | # vs0 - vs14 for round keys |
33 | # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) |
34 | # |
35 | # This implementation uses stitched AES-GCM approach to improve overall performance. |
36 | # AES is implemented with 8x blocks and GHASH is using 2 4x blocks. |
37 | # |
38 | # =================================================================================== |
39 | # |
40 | |
41 | #include <asm/ppc_asm.h> |
42 | #include <linux/linkage.h> |
43 | |
44 | .machine "any" |
45 | .text |
46 | |
47 | # 4x loops |
48 | # v15 - v18 - input states |
49 | # vs1 - vs9 - round keys |
50 | # |
51 | .macro Loop_aes_middle4x |
52 | xxlor 19+32, 1, 1 |
53 | xxlor 20+32, 2, 2 |
54 | xxlor 21+32, 3, 3 |
55 | xxlor 22+32, 4, 4 |
56 | |
57 | vcipher 15, 15, 19 |
58 | vcipher 16, 16, 19 |
59 | vcipher 17, 17, 19 |
60 | vcipher 18, 18, 19 |
61 | |
62 | vcipher 15, 15, 20 |
63 | vcipher 16, 16, 20 |
64 | vcipher 17, 17, 20 |
65 | vcipher 18, 18, 20 |
66 | |
67 | vcipher 15, 15, 21 |
68 | vcipher 16, 16, 21 |
69 | vcipher 17, 17, 21 |
70 | vcipher 18, 18, 21 |
71 | |
72 | vcipher 15, 15, 22 |
73 | vcipher 16, 16, 22 |
74 | vcipher 17, 17, 22 |
75 | vcipher 18, 18, 22 |
76 | |
77 | xxlor 19+32, 5, 5 |
78 | xxlor 20+32, 6, 6 |
79 | xxlor 21+32, 7, 7 |
80 | xxlor 22+32, 8, 8 |
81 | |
82 | vcipher 15, 15, 19 |
83 | vcipher 16, 16, 19 |
84 | vcipher 17, 17, 19 |
85 | vcipher 18, 18, 19 |
86 | |
87 | vcipher 15, 15, 20 |
88 | vcipher 16, 16, 20 |
89 | vcipher 17, 17, 20 |
90 | vcipher 18, 18, 20 |
91 | |
92 | vcipher 15, 15, 21 |
93 | vcipher 16, 16, 21 |
94 | vcipher 17, 17, 21 |
95 | vcipher 18, 18, 21 |
96 | |
97 | vcipher 15, 15, 22 |
98 | vcipher 16, 16, 22 |
99 | vcipher 17, 17, 22 |
100 | vcipher 18, 18, 22 |
101 | |
102 | xxlor 23+32, 9, 9 |
103 | vcipher 15, 15, 23 |
104 | vcipher 16, 16, 23 |
105 | vcipher 17, 17, 23 |
106 | vcipher 18, 18, 23 |
107 | .endm |
108 | |
109 | # 8x loops |
110 | # v15 - v22 - input states |
111 | # vs1 - vs9 - round keys |
112 | # |
113 | .macro Loop_aes_middle8x |
114 | xxlor 23+32, 1, 1 |
115 | xxlor 24+32, 2, 2 |
116 | xxlor 25+32, 3, 3 |
117 | xxlor 26+32, 4, 4 |
118 | |
119 | vcipher 15, 15, 23 |
120 | vcipher 16, 16, 23 |
121 | vcipher 17, 17, 23 |
122 | vcipher 18, 18, 23 |
123 | vcipher 19, 19, 23 |
124 | vcipher 20, 20, 23 |
125 | vcipher 21, 21, 23 |
126 | vcipher 22, 22, 23 |
127 | |
128 | vcipher 15, 15, 24 |
129 | vcipher 16, 16, 24 |
130 | vcipher 17, 17, 24 |
131 | vcipher 18, 18, 24 |
132 | vcipher 19, 19, 24 |
133 | vcipher 20, 20, 24 |
134 | vcipher 21, 21, 24 |
135 | vcipher 22, 22, 24 |
136 | |
137 | vcipher 15, 15, 25 |
138 | vcipher 16, 16, 25 |
139 | vcipher 17, 17, 25 |
140 | vcipher 18, 18, 25 |
141 | vcipher 19, 19, 25 |
142 | vcipher 20, 20, 25 |
143 | vcipher 21, 21, 25 |
144 | vcipher 22, 22, 25 |
145 | |
146 | vcipher 15, 15, 26 |
147 | vcipher 16, 16, 26 |
148 | vcipher 17, 17, 26 |
149 | vcipher 18, 18, 26 |
150 | vcipher 19, 19, 26 |
151 | vcipher 20, 20, 26 |
152 | vcipher 21, 21, 26 |
153 | vcipher 22, 22, 26 |
154 | |
155 | xxlor 23+32, 5, 5 |
156 | xxlor 24+32, 6, 6 |
157 | xxlor 25+32, 7, 7 |
158 | xxlor 26+32, 8, 8 |
159 | |
160 | vcipher 15, 15, 23 |
161 | vcipher 16, 16, 23 |
162 | vcipher 17, 17, 23 |
163 | vcipher 18, 18, 23 |
164 | vcipher 19, 19, 23 |
165 | vcipher 20, 20, 23 |
166 | vcipher 21, 21, 23 |
167 | vcipher 22, 22, 23 |
168 | |
169 | vcipher 15, 15, 24 |
170 | vcipher 16, 16, 24 |
171 | vcipher 17, 17, 24 |
172 | vcipher 18, 18, 24 |
173 | vcipher 19, 19, 24 |
174 | vcipher 20, 20, 24 |
175 | vcipher 21, 21, 24 |
176 | vcipher 22, 22, 24 |
177 | |
178 | vcipher 15, 15, 25 |
179 | vcipher 16, 16, 25 |
180 | vcipher 17, 17, 25 |
181 | vcipher 18, 18, 25 |
182 | vcipher 19, 19, 25 |
183 | vcipher 20, 20, 25 |
184 | vcipher 21, 21, 25 |
185 | vcipher 22, 22, 25 |
186 | |
187 | vcipher 15, 15, 26 |
188 | vcipher 16, 16, 26 |
189 | vcipher 17, 17, 26 |
190 | vcipher 18, 18, 26 |
191 | vcipher 19, 19, 26 |
192 | vcipher 20, 20, 26 |
193 | vcipher 21, 21, 26 |
194 | vcipher 22, 22, 26 |
195 | |
196 | xxlor 23+32, 9, 9 |
197 | vcipher 15, 15, 23 |
198 | vcipher 16, 16, 23 |
199 | vcipher 17, 17, 23 |
200 | vcipher 18, 18, 23 |
201 | vcipher 19, 19, 23 |
202 | vcipher 20, 20, 23 |
203 | vcipher 21, 21, 23 |
204 | vcipher 22, 22, 23 |
205 | .endm |
206 | |
207 | .macro Loop_aes_middle_1x |
208 | xxlor 19+32, 1, 1 |
209 | xxlor 20+32, 2, 2 |
210 | xxlor 21+32, 3, 3 |
211 | xxlor 22+32, 4, 4 |
212 | |
213 | vcipher 15, 15, 19 |
214 | vcipher 15, 15, 20 |
215 | vcipher 15, 15, 21 |
216 | vcipher 15, 15, 22 |
217 | |
218 | xxlor 19+32, 5, 5 |
219 | xxlor 20+32, 6, 6 |
220 | xxlor 21+32, 7, 7 |
221 | xxlor 22+32, 8, 8 |
222 | |
223 | vcipher 15, 15, 19 |
224 | vcipher 15, 15, 20 |
225 | vcipher 15, 15, 21 |
226 | vcipher 15, 15, 22 |
227 | |
228 | xxlor 19+32, 9, 9 |
229 | vcipher 15, 15, 19 |
230 | .endm |
231 | |
232 | # |
233 | # Compute 4x hash values based on Karatsuba method. |
234 | # |
235 | .macro ppc_aes_gcm_ghash |
236 | vxor 15, 15, 0 |
237 | |
238 | vpmsumd 23, 12, 15 # H4.L * X.L |
239 | vpmsumd 24, 9, 16 |
240 | vpmsumd 25, 6, 17 |
241 | vpmsumd 26, 3, 18 |
242 | |
243 | vxor 23, 23, 24 |
244 | vxor 23, 23, 25 |
245 | vxor 23, 23, 26 # L |
246 | |
247 | vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L |
248 | vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L |
249 | vpmsumd 26, 7, 17 |
250 | vpmsumd 27, 4, 18 |
251 | |
252 | vxor 24, 24, 25 |
253 | vxor 24, 24, 26 |
254 | vxor 24, 24, 27 # M |
255 | |
256 | # sum hash and reduction with H Poly |
257 | vpmsumd 28, 23, 2 # reduction |
258 | |
259 | vxor 29, 29, 29 |
260 | vsldoi 26, 24, 29, 8 # mL |
261 | vsldoi 29, 29, 24, 8 # mH |
262 | vxor 23, 23, 26 # mL + L |
263 | |
264 | vsldoi 23, 23, 23, 8 # swap |
265 | vxor 23, 23, 28 |
266 | |
267 | vpmsumd 24, 14, 15 # H4.H * X.H |
268 | vpmsumd 25, 11, 16 |
269 | vpmsumd 26, 8, 17 |
270 | vpmsumd 27, 5, 18 |
271 | |
272 | vxor 24, 24, 25 |
273 | vxor 24, 24, 26 |
274 | vxor 24, 24, 27 |
275 | |
276 | vxor 24, 24, 29 |
277 | |
278 | # sum hash and reduction with H Poly |
279 | vsldoi 27, 23, 23, 8 # swap |
280 | vpmsumd 23, 23, 2 |
281 | vxor 27, 27, 24 |
282 | vxor 23, 23, 27 |
283 | |
284 | xxlor 32, 23+32, 23+32 # update hash |
285 | |
286 | .endm |
287 | |
288 | # |
289 | # Combine two 4x ghash |
290 | # v15 - v22 - input blocks |
291 | # |
292 | .macro ppc_aes_gcm_ghash2_4x |
293 | # first 4x hash |
294 | vxor 15, 15, 0 # Xi + X |
295 | |
296 | vpmsumd 23, 12, 15 # H4.L * X.L |
297 | vpmsumd 24, 9, 16 |
298 | vpmsumd 25, 6, 17 |
299 | vpmsumd 26, 3, 18 |
300 | |
301 | vxor 23, 23, 24 |
302 | vxor 23, 23, 25 |
303 | vxor 23, 23, 26 # L |
304 | |
305 | vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L |
306 | vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L |
307 | vpmsumd 26, 7, 17 |
308 | vpmsumd 27, 4, 18 |
309 | |
310 | vxor 24, 24, 25 |
311 | vxor 24, 24, 26 |
312 | |
313 | # sum hash and reduction with H Poly |
314 | vpmsumd 28, 23, 2 # reduction |
315 | |
316 | vxor 29, 29, 29 |
317 | |
318 | vxor 24, 24, 27 # M |
319 | vsldoi 26, 24, 29, 8 # mL |
320 | vsldoi 29, 29, 24, 8 # mH |
321 | vxor 23, 23, 26 # mL + L |
322 | |
323 | vsldoi 23, 23, 23, 8 # swap |
324 | vxor 23, 23, 28 |
325 | |
326 | vpmsumd 24, 14, 15 # H4.H * X.H |
327 | vpmsumd 25, 11, 16 |
328 | vpmsumd 26, 8, 17 |
329 | vpmsumd 27, 5, 18 |
330 | |
331 | vxor 24, 24, 25 |
332 | vxor 24, 24, 26 |
333 | vxor 24, 24, 27 # H |
334 | |
335 | vxor 24, 24, 29 # H + mH |
336 | |
337 | # sum hash and reduction with H Poly |
338 | vsldoi 27, 23, 23, 8 # swap |
339 | vpmsumd 23, 23, 2 |
340 | vxor 27, 27, 24 |
341 | vxor 27, 23, 27 # 1st Xi |
342 | |
343 | # 2nd 4x hash |
344 | vpmsumd 24, 9, 20 |
345 | vpmsumd 25, 6, 21 |
346 | vpmsumd 26, 3, 22 |
347 | vxor 19, 19, 27 # Xi + X |
348 | vpmsumd 23, 12, 19 # H4.L * X.L |
349 | |
350 | vxor 23, 23, 24 |
351 | vxor 23, 23, 25 |
352 | vxor 23, 23, 26 # L |
353 | |
354 | vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L |
355 | vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L |
356 | vpmsumd 26, 7, 21 |
357 | vpmsumd 27, 4, 22 |
358 | |
359 | vxor 24, 24, 25 |
360 | vxor 24, 24, 26 |
361 | |
362 | # sum hash and reduction with H Poly |
363 | vpmsumd 28, 23, 2 # reduction |
364 | |
365 | vxor 29, 29, 29 |
366 | |
367 | vxor 24, 24, 27 # M |
368 | vsldoi 26, 24, 29, 8 # mL |
369 | vsldoi 29, 29, 24, 8 # mH |
370 | vxor 23, 23, 26 # mL + L |
371 | |
372 | vsldoi 23, 23, 23, 8 # swap |
373 | vxor 23, 23, 28 |
374 | |
375 | vpmsumd 24, 14, 19 # H4.H * X.H |
376 | vpmsumd 25, 11, 20 |
377 | vpmsumd 26, 8, 21 |
378 | vpmsumd 27, 5, 22 |
379 | |
380 | vxor 24, 24, 25 |
381 | vxor 24, 24, 26 |
382 | vxor 24, 24, 27 # H |
383 | |
384 | vxor 24, 24, 29 # H + mH |
385 | |
386 | # sum hash and reduction with H Poly |
387 | vsldoi 27, 23, 23, 8 # swap |
388 | vpmsumd 23, 23, 2 |
389 | vxor 27, 27, 24 |
390 | vxor 23, 23, 27 |
391 | |
392 | xxlor 32, 23+32, 23+32 # update hash |
393 | |
394 | .endm |
395 | |
396 | # |
397 | # Compute update single hash |
398 | # |
399 | .macro ppc_update_hash_1x |
400 | vxor 28, 28, 0 |
401 | |
402 | vxor 19, 19, 19 |
403 | |
404 | vpmsumd 22, 3, 28 # L |
405 | vpmsumd 23, 4, 28 # M |
406 | vpmsumd 24, 5, 28 # H |
407 | |
408 | vpmsumd 27, 22, 2 # reduction |
409 | |
410 | vsldoi 25, 23, 19, 8 # mL |
411 | vsldoi 26, 19, 23, 8 # mH |
412 | vxor 22, 22, 25 # LL + LL |
413 | vxor 24, 24, 26 # HH + HH |
414 | |
415 | vsldoi 22, 22, 22, 8 # swap |
416 | vxor 22, 22, 27 |
417 | |
418 | vsldoi 20, 22, 22, 8 # swap |
419 | vpmsumd 22, 22, 2 # reduction |
420 | vxor 20, 20, 24 |
421 | vxor 22, 22, 20 |
422 | |
423 | vmr 0, 22 # update hash |
424 | |
425 | .endm |
426 | |
427 | .macro SAVE_REGS |
428 | stdu 1,-640(1) |
429 | mflr 0 |
430 | |
431 | std 14,112(1) |
432 | std 15,120(1) |
433 | std 16,128(1) |
434 | std 17,136(1) |
435 | std 18,144(1) |
436 | std 19,152(1) |
437 | std 20,160(1) |
438 | std 21,168(1) |
439 | li 9, 256 |
440 | stvx 20, 9, 1 |
441 | addi 9, 9, 16 |
442 | stvx 21, 9, 1 |
443 | addi 9, 9, 16 |
444 | stvx 22, 9, 1 |
445 | addi 9, 9, 16 |
446 | stvx 23, 9, 1 |
447 | addi 9, 9, 16 |
448 | stvx 24, 9, 1 |
449 | addi 9, 9, 16 |
450 | stvx 25, 9, 1 |
451 | addi 9, 9, 16 |
452 | stvx 26, 9, 1 |
453 | addi 9, 9, 16 |
454 | stvx 27, 9, 1 |
455 | addi 9, 9, 16 |
456 | stvx 28, 9, 1 |
457 | addi 9, 9, 16 |
458 | stvx 29, 9, 1 |
459 | addi 9, 9, 16 |
460 | stvx 30, 9, 1 |
461 | addi 9, 9, 16 |
462 | stvx 31, 9, 1 |
463 | stxv 14, 464(1) |
464 | stxv 15, 480(1) |
465 | stxv 16, 496(1) |
466 | stxv 17, 512(1) |
467 | stxv 18, 528(1) |
468 | stxv 19, 544(1) |
469 | stxv 20, 560(1) |
470 | stxv 21, 576(1) |
471 | stxv 22, 592(1) |
472 | std 0, 656(1) |
473 | .endm |
474 | |
475 | .macro RESTORE_REGS |
476 | lxv 14, 464(1) |
477 | lxv 15, 480(1) |
478 | lxv 16, 496(1) |
479 | lxv 17, 512(1) |
480 | lxv 18, 528(1) |
481 | lxv 19, 544(1) |
482 | lxv 20, 560(1) |
483 | lxv 21, 576(1) |
484 | lxv 22, 592(1) |
485 | li 9, 256 |
486 | lvx 20, 9, 1 |
487 | addi 9, 9, 16 |
488 | lvx 21, 9, 1 |
489 | addi 9, 9, 16 |
490 | lvx 22, 9, 1 |
491 | addi 9, 9, 16 |
492 | lvx 23, 9, 1 |
493 | addi 9, 9, 16 |
494 | lvx 24, 9, 1 |
495 | addi 9, 9, 16 |
496 | lvx 25, 9, 1 |
497 | addi 9, 9, 16 |
498 | lvx 26, 9, 1 |
499 | addi 9, 9, 16 |
500 | lvx 27, 9, 1 |
501 | addi 9, 9, 16 |
502 | lvx 28, 9, 1 |
503 | addi 9, 9, 16 |
504 | lvx 29, 9, 1 |
505 | addi 9, 9, 16 |
506 | lvx 30, 9, 1 |
507 | addi 9, 9, 16 |
508 | lvx 31, 9, 1 |
509 | |
510 | ld 0, 656(1) |
511 | ld 14,112(1) |
512 | ld 15,120(1) |
513 | ld 16,128(1) |
514 | ld 17,136(1) |
515 | ld 18,144(1) |
516 | ld 19,152(1) |
517 | ld 20,160(1) |
518 | ld 21,168(1) |
519 | |
520 | mtlr 0 |
521 | addi 1, 1, 640 |
522 | .endm |
523 | |
524 | .macro LOAD_HASH_TABLE |
525 | # Load Xi |
526 | lxvb16x 32, 0, 8 # load Xi |
527 | |
528 | # load Hash - h^4, h^3, h^2, h |
529 | li 10, 32 |
530 | lxvd2x 2+32, 10, 8 # H Poli |
531 | li 10, 48 |
532 | lxvd2x 3+32, 10, 8 # Hl |
533 | li 10, 64 |
534 | lxvd2x 4+32, 10, 8 # H |
535 | li 10, 80 |
536 | lxvd2x 5+32, 10, 8 # Hh |
537 | |
538 | li 10, 96 |
539 | lxvd2x 6+32, 10, 8 # H^2l |
540 | li 10, 112 |
541 | lxvd2x 7+32, 10, 8 # H^2 |
542 | li 10, 128 |
543 | lxvd2x 8+32, 10, 8 # H^2h |
544 | |
545 | li 10, 144 |
546 | lxvd2x 9+32, 10, 8 # H^3l |
547 | li 10, 160 |
548 | lxvd2x 10+32, 10, 8 # H^3 |
549 | li 10, 176 |
550 | lxvd2x 11+32, 10, 8 # H^3h |
551 | |
552 | li 10, 192 |
553 | lxvd2x 12+32, 10, 8 # H^4l |
554 | li 10, 208 |
555 | lxvd2x 13+32, 10, 8 # H^4 |
556 | li 10, 224 |
557 | lxvd2x 14+32, 10, 8 # H^4h |
558 | .endm |
559 | |
560 | # |
561 | # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, |
562 | # const char *rk, unsigned char iv[16], void *Xip); |
563 | # |
564 | # r3 - inp |
565 | # r4 - out |
566 | # r5 - len |
567 | # r6 - AES round keys |
568 | # r7 - iv and other data |
569 | # r8 - Xi, HPoli, hash keys |
570 | # |
571 | # rounds is at offset 240 in rk |
572 | # Xi is at 0 in gcm_table (Xip). |
573 | # |
574 | _GLOBAL(aes_p10_gcm_encrypt) |
575 | .align 5 |
576 | |
577 | SAVE_REGS |
578 | |
579 | LOAD_HASH_TABLE |
580 | |
581 | # initialize ICB: GHASH( IV ), IV - r7 |
582 | lxvb16x 30+32, 0, 7 # load IV - v30 |
583 | |
584 | mr 12, 5 # length |
585 | li 11, 0 # block index |
586 | |
587 | # counter 1 |
588 | vxor 31, 31, 31 |
589 | vspltisb 22, 1 |
590 | vsldoi 31, 31, 22,1 # counter 1 |
591 | |
592 | # load round key to VSR |
593 | lxv 0, 0(6) |
594 | lxv 1, 0x10(6) |
595 | lxv 2, 0x20(6) |
596 | lxv 3, 0x30(6) |
597 | lxv 4, 0x40(6) |
598 | lxv 5, 0x50(6) |
599 | lxv 6, 0x60(6) |
600 | lxv 7, 0x70(6) |
601 | lxv 8, 0x80(6) |
602 | lxv 9, 0x90(6) |
603 | lxv 10, 0xa0(6) |
604 | |
605 | # load rounds - 10 (128), 12 (192), 14 (256) |
606 | lwz 9,240(6) |
607 | |
608 | # |
609 | # vxor state, state, w # addroundkey |
610 | xxlor 32+29, 0, 0 |
611 | vxor 15, 30, 29 # IV + round key - add round key 0 |
612 | |
613 | cmpdi 9, 10 |
614 | beq Loop_aes_gcm_8x |
615 | |
616 | # load 2 more round keys (v11, v12) |
617 | lxv 11, 0xb0(6) |
618 | lxv 12, 0xc0(6) |
619 | |
620 | cmpdi 9, 12 |
621 | beq Loop_aes_gcm_8x |
622 | |
623 | # load 2 more round keys (v11, v12, v13, v14) |
624 | lxv 13, 0xd0(6) |
625 | lxv 14, 0xe0(6) |
626 | cmpdi 9, 14 |
627 | beq Loop_aes_gcm_8x |
628 | |
629 | b aes_gcm_out |
630 | |
631 | .align 5 |
632 | Loop_aes_gcm_8x: |
633 | mr 14, 3 |
634 | mr 9, 4 |
635 | |
636 | # |
637 | # check partial block |
638 | # |
639 | Continue_partial_check: |
640 | ld 15, 56(7) |
641 | cmpdi 15, 0 |
642 | beq Continue |
643 | bgt Final_block |
644 | cmpdi 15, 16 |
645 | blt Final_block |
646 | |
647 | Continue: |
648 | # n blcoks |
649 | li 10, 128 |
650 | divdu 10, 12, 10 # n 128 bytes-blocks |
651 | cmpdi 10, 0 |
652 | beq Loop_last_block |
653 | |
654 | vaddudm 30, 30, 31 # IV + counter |
655 | vxor 16, 30, 29 |
656 | vaddudm 30, 30, 31 |
657 | vxor 17, 30, 29 |
658 | vaddudm 30, 30, 31 |
659 | vxor 18, 30, 29 |
660 | vaddudm 30, 30, 31 |
661 | vxor 19, 30, 29 |
662 | vaddudm 30, 30, 31 |
663 | vxor 20, 30, 29 |
664 | vaddudm 30, 30, 31 |
665 | vxor 21, 30, 29 |
666 | vaddudm 30, 30, 31 |
667 | vxor 22, 30, 29 |
668 | |
669 | mtctr 10 |
670 | |
671 | li 15, 16 |
672 | li 16, 32 |
673 | li 17, 48 |
674 | li 18, 64 |
675 | li 19, 80 |
676 | li 20, 96 |
677 | li 21, 112 |
678 | |
679 | lwz 10, 240(6) |
680 | |
681 | Loop_8x_block: |
682 | |
683 | lxvb16x 15, 0, 14 # load block |
684 | lxvb16x 16, 15, 14 # load block |
685 | lxvb16x 17, 16, 14 # load block |
686 | lxvb16x 18, 17, 14 # load block |
687 | lxvb16x 19, 18, 14 # load block |
688 | lxvb16x 20, 19, 14 # load block |
689 | lxvb16x 21, 20, 14 # load block |
690 | lxvb16x 22, 21, 14 # load block |
691 | addi 14, 14, 128 |
692 | |
693 | Loop_aes_middle8x |
694 | |
695 | xxlor 23+32, 10, 10 |
696 | |
697 | cmpdi 10, 10 |
698 | beq Do_next_ghash |
699 | |
700 | # 192 bits |
701 | xxlor 24+32, 11, 11 |
702 | |
703 | vcipher 15, 15, 23 |
704 | vcipher 16, 16, 23 |
705 | vcipher 17, 17, 23 |
706 | vcipher 18, 18, 23 |
707 | vcipher 19, 19, 23 |
708 | vcipher 20, 20, 23 |
709 | vcipher 21, 21, 23 |
710 | vcipher 22, 22, 23 |
711 | |
712 | vcipher 15, 15, 24 |
713 | vcipher 16, 16, 24 |
714 | vcipher 17, 17, 24 |
715 | vcipher 18, 18, 24 |
716 | vcipher 19, 19, 24 |
717 | vcipher 20, 20, 24 |
718 | vcipher 21, 21, 24 |
719 | vcipher 22, 22, 24 |
720 | |
721 | xxlor 23+32, 12, 12 |
722 | |
723 | cmpdi 10, 12 |
724 | beq Do_next_ghash |
725 | |
726 | # 256 bits |
727 | xxlor 24+32, 13, 13 |
728 | |
729 | vcipher 15, 15, 23 |
730 | vcipher 16, 16, 23 |
731 | vcipher 17, 17, 23 |
732 | vcipher 18, 18, 23 |
733 | vcipher 19, 19, 23 |
734 | vcipher 20, 20, 23 |
735 | vcipher 21, 21, 23 |
736 | vcipher 22, 22, 23 |
737 | |
738 | vcipher 15, 15, 24 |
739 | vcipher 16, 16, 24 |
740 | vcipher 17, 17, 24 |
741 | vcipher 18, 18, 24 |
742 | vcipher 19, 19, 24 |
743 | vcipher 20, 20, 24 |
744 | vcipher 21, 21, 24 |
745 | vcipher 22, 22, 24 |
746 | |
747 | xxlor 23+32, 14, 14 |
748 | |
749 | cmpdi 10, 14 |
750 | beq Do_next_ghash |
751 | b aes_gcm_out |
752 | |
753 | Do_next_ghash: |
754 | |
755 | # |
756 | # last round |
757 | vcipherlast 15, 15, 23 |
758 | vcipherlast 16, 16, 23 |
759 | |
760 | xxlxor 47, 47, 15 |
761 | stxvb16x 47, 0, 9 # store output |
762 | xxlxor 48, 48, 16 |
763 | stxvb16x 48, 15, 9 # store output |
764 | |
765 | vcipherlast 17, 17, 23 |
766 | vcipherlast 18, 18, 23 |
767 | |
768 | xxlxor 49, 49, 17 |
769 | stxvb16x 49, 16, 9 # store output |
770 | xxlxor 50, 50, 18 |
771 | stxvb16x 50, 17, 9 # store output |
772 | |
773 | vcipherlast 19, 19, 23 |
774 | vcipherlast 20, 20, 23 |
775 | |
776 | xxlxor 51, 51, 19 |
777 | stxvb16x 51, 18, 9 # store output |
778 | xxlxor 52, 52, 20 |
779 | stxvb16x 52, 19, 9 # store output |
780 | |
781 | vcipherlast 21, 21, 23 |
782 | vcipherlast 22, 22, 23 |
783 | |
784 | xxlxor 53, 53, 21 |
785 | stxvb16x 53, 20, 9 # store output |
786 | xxlxor 54, 54, 22 |
787 | stxvb16x 54, 21, 9 # store output |
788 | |
789 | addi 9, 9, 128 |
790 | |
791 | # ghash here |
792 | ppc_aes_gcm_ghash2_4x |
793 | |
794 | xxlor 27+32, 0, 0 |
795 | vaddudm 30, 30, 31 # IV + counter |
796 | vmr 29, 30 |
797 | vxor 15, 30, 27 # add round key |
798 | vaddudm 30, 30, 31 |
799 | vxor 16, 30, 27 |
800 | vaddudm 30, 30, 31 |
801 | vxor 17, 30, 27 |
802 | vaddudm 30, 30, 31 |
803 | vxor 18, 30, 27 |
804 | vaddudm 30, 30, 31 |
805 | vxor 19, 30, 27 |
806 | vaddudm 30, 30, 31 |
807 | vxor 20, 30, 27 |
808 | vaddudm 30, 30, 31 |
809 | vxor 21, 30, 27 |
810 | vaddudm 30, 30, 31 |
811 | vxor 22, 30, 27 |
812 | |
813 | addi 12, 12, -128 |
814 | addi 11, 11, 128 |
815 | |
816 | bdnz Loop_8x_block |
817 | |
818 | vmr 30, 29 |
819 | stxvb16x 30+32, 0, 7 # update IV |
820 | |
821 | Loop_last_block: |
822 | cmpdi 12, 0 |
823 | beq aes_gcm_out |
824 | |
825 | # loop last few blocks |
826 | li 10, 16 |
827 | divdu 10, 12, 10 |
828 | |
829 | mtctr 10 |
830 | |
831 | lwz 10, 240(6) |
832 | |
833 | cmpdi 12, 16 |
834 | blt Final_block |
835 | |
836 | Next_rem_block: |
837 | lxvb16x 15, 0, 14 # load block |
838 | |
839 | Loop_aes_middle_1x |
840 | |
841 | xxlor 23+32, 10, 10 |
842 | |
843 | cmpdi 10, 10 |
844 | beq Do_next_1x |
845 | |
846 | # 192 bits |
847 | xxlor 24+32, 11, 11 |
848 | |
849 | vcipher 15, 15, 23 |
850 | vcipher 15, 15, 24 |
851 | |
852 | xxlor 23+32, 12, 12 |
853 | |
854 | cmpdi 10, 12 |
855 | beq Do_next_1x |
856 | |
857 | # 256 bits |
858 | xxlor 24+32, 13, 13 |
859 | |
860 | vcipher 15, 15, 23 |
861 | vcipher 15, 15, 24 |
862 | |
863 | xxlor 23+32, 14, 14 |
864 | |
865 | cmpdi 10, 14 |
866 | beq Do_next_1x |
867 | |
868 | Do_next_1x: |
869 | vcipherlast 15, 15, 23 |
870 | |
871 | xxlxor 47, 47, 15 |
872 | stxvb16x 47, 0, 9 # store output |
873 | addi 14, 14, 16 |
874 | addi 9, 9, 16 |
875 | |
876 | vmr 28, 15 |
877 | ppc_update_hash_1x |
878 | |
879 | addi 12, 12, -16 |
880 | addi 11, 11, 16 |
881 | xxlor 19+32, 0, 0 |
882 | vaddudm 30, 30, 31 # IV + counter |
883 | vxor 15, 30, 19 # add round key |
884 | |
885 | bdnz Next_rem_block |
886 | |
887 | li 15, 0 |
888 | std 15, 56(7) # clear partial? |
889 | stxvb16x 30+32, 0, 7 # update IV |
890 | cmpdi 12, 0 |
891 | beq aes_gcm_out |
892 | |
893 | Final_block: |
894 | lwz 10, 240(6) |
895 | Loop_aes_middle_1x |
896 | |
897 | xxlor 23+32, 10, 10 |
898 | |
899 | cmpdi 10, 10 |
900 | beq Do_final_1x |
901 | |
902 | # 192 bits |
903 | xxlor 24+32, 11, 11 |
904 | |
905 | vcipher 15, 15, 23 |
906 | vcipher 15, 15, 24 |
907 | |
908 | xxlor 23+32, 12, 12 |
909 | |
910 | cmpdi 10, 12 |
911 | beq Do_final_1x |
912 | |
913 | # 256 bits |
914 | xxlor 24+32, 13, 13 |
915 | |
916 | vcipher 15, 15, 23 |
917 | vcipher 15, 15, 24 |
918 | |
919 | xxlor 23+32, 14, 14 |
920 | |
921 | cmpdi 10, 14 |
922 | beq Do_final_1x |
923 | |
924 | Do_final_1x: |
925 | vcipherlast 15, 15, 23 |
926 | |
927 | # check partial block |
928 | li 21, 0 # encrypt |
929 | ld 15, 56(7) # partial? |
930 | cmpdi 15, 0 |
931 | beq Normal_block |
932 | bl Do_partial_block |
933 | |
934 | cmpdi 12, 0 |
935 | ble aes_gcm_out |
936 | |
937 | b Continue_partial_check |
938 | |
939 | Normal_block: |
940 | lxvb16x 15, 0, 14 # load last block |
941 | xxlxor 47, 47, 15 |
942 | |
943 | # create partial block mask |
944 | li 15, 16 |
945 | sub 15, 15, 12 # index to the mask |
946 | |
947 | vspltisb 16, -1 # first 16 bytes - 0xffff...ff |
948 | vspltisb 17, 0 # second 16 bytes - 0x0000...00 |
949 | li 10, 192 |
950 | stvx 16, 10, 1 |
951 | addi 10, 10, 16 |
952 | stvx 17, 10, 1 |
953 | |
954 | addi 10, 1, 192 |
955 | lxvb16x 16, 15, 10 # load partial block mask |
956 | xxland 47, 47, 16 |
957 | |
958 | vmr 28, 15 |
959 | ppc_update_hash_1x |
960 | |
961 | # * should store only the remaining bytes. |
962 | bl Write_partial_block |
963 | |
964 | stxvb16x 30+32, 0, 7 # update IV |
965 | std 12, 56(7) # update partial? |
966 | li 16, 16 |
967 | |
968 | stxvb16x 32, 0, 8 # write out Xi |
969 | stxvb16x 32, 16, 8 # write out Xi |
970 | b aes_gcm_out |
971 | |
972 | # |
973 | # Compute data mask |
974 | # |
975 | .macro GEN_MASK _mask _start _end |
976 | vspltisb 16, -1 # first 16 bytes - 0xffff...ff |
977 | vspltisb 17, 0 # second 16 bytes - 0x0000...00 |
978 | li 10, 192 |
979 | stxvb16x 17+32, 10, 1 |
980 | add 10, 10, \_start |
981 | stxvb16x 16+32, 10, 1 |
982 | add 10, 10, \_end |
983 | stxvb16x 17+32, 10, 1 |
984 | |
985 | addi 10, 1, 192 |
986 | lxvb16x \_mask, 0, 10 # load partial block mask |
987 | .endm |
988 | |
989 | # |
990 | # Handle multiple partial blocks for encrypt and decrypt |
991 | # operations. |
992 | # |
993 | SYM_FUNC_START_LOCAL(Do_partial_block) |
994 | add 17, 15, 5 |
995 | cmpdi 17, 16 |
996 | bgt Big_block |
997 | GEN_MASK 18, 15, 5 |
998 | b _Partial |
999 | SYM_FUNC_END(Do_partial_block) |
1000 | Big_block: |
1001 | li 16, 16 |
1002 | GEN_MASK 18, 15, 16 |
1003 | |
1004 | _Partial: |
1005 | lxvb16x 17+32, 0, 14 # load last block |
1006 | sldi 16, 15, 3 |
1007 | mtvsrdd 32+16, 0, 16 |
1008 | vsro 17, 17, 16 |
1009 | xxlxor 47, 47, 17+32 |
1010 | xxland 47, 47, 18 |
1011 | |
1012 | vxor 0, 0, 0 # clear Xi |
1013 | vmr 28, 15 |
1014 | |
1015 | cmpdi 21, 0 # encrypt/decrypt ops? |
1016 | beq Skip_decrypt |
1017 | xxland 32+28, 32+17, 18 |
1018 | |
1019 | Skip_decrypt: |
1020 | |
1021 | ppc_update_hash_1x |
1022 | |
1023 | li 16, 16 |
1024 | lxvb16x 32+29, 16, 8 |
1025 | vxor 0, 0, 29 |
1026 | stxvb16x 32, 0, 8 # save Xi |
1027 | stxvb16x 32, 16, 8 # save Xi |
1028 | |
1029 | # store partial block |
1030 | # loop the rest of the stream if any |
1031 | sldi 16, 15, 3 |
1032 | mtvsrdd 32+16, 0, 16 |
1033 | vslo 15, 15, 16 |
1034 | #stxvb16x 15+32, 0, 9 # last block |
1035 | |
1036 | li 16, 16 |
1037 | sub 17, 16, 15 # 16 - partial |
1038 | |
1039 | add 16, 15, 5 |
1040 | cmpdi 16, 16 |
1041 | bgt Larger_16 |
1042 | mr 17, 5 |
1043 | Larger_16: |
1044 | |
1045 | # write partial |
1046 | li 10, 192 |
1047 | stxvb16x 15+32, 10, 1 # save current block |
1048 | |
1049 | addi 10, 9, -1 |
1050 | addi 16, 1, 191 |
1051 | mtctr 17 # move partial byte count |
1052 | |
1053 | Write_last_partial: |
1054 | lbzu 18, 1(16) |
1055 | stbu 18, 1(10) |
1056 | bdnz Write_last_partial |
1057 | # Complete loop partial |
1058 | |
1059 | add 14, 14, 17 |
1060 | add 9, 9, 17 |
1061 | sub 12, 12, 17 |
1062 | add 11, 11, 17 |
1063 | |
1064 | add 15, 15, 5 |
1065 | cmpdi 15, 16 |
1066 | blt Save_partial |
1067 | |
1068 | vaddudm 30, 30, 31 |
1069 | stxvb16x 30+32, 0, 7 # update IV |
1070 | xxlor 32+29, 0, 0 |
1071 | vxor 15, 30, 29 # IV + round key - add round key 0 |
1072 | li 15, 0 |
1073 | std 15, 56(7) # partial done - clear |
1074 | b Partial_done |
1075 | Save_partial: |
1076 | std 15, 56(7) # partial |
1077 | |
1078 | Partial_done: |
1079 | blr |
1080 | |
1081 | # |
1082 | # Write partial block |
1083 | # r9 - output |
1084 | # r12 - remaining bytes |
1085 | # v15 - partial input data |
1086 | # |
1087 | SYM_FUNC_START_LOCAL(Write_partial_block) |
1088 | li 10, 192 |
1089 | stxvb16x 15+32, 10, 1 # last block |
1090 | |
1091 | addi 10, 9, -1 |
1092 | addi 16, 1, 191 |
1093 | |
1094 | mtctr 12 # remaining bytes |
1095 | li 15, 0 |
1096 | |
1097 | Write_last_byte: |
1098 | lbzu 14, 1(16) |
1099 | stbu 14, 1(10) |
1100 | bdnz Write_last_byte |
1101 | blr |
1102 | SYM_FUNC_END(Write_partial_block) |
1103 | |
1104 | aes_gcm_out: |
1105 | # out = state |
1106 | stxvb16x 32, 0, 8 # write out Xi |
1107 | add 3, 11, 12 # return count |
1108 | |
1109 | RESTORE_REGS |
1110 | blr |
1111 | |
1112 | # |
1113 | # 8x Decrypt |
1114 | # |
1115 | _GLOBAL(aes_p10_gcm_decrypt) |
1116 | .align 5 |
1117 | |
1118 | SAVE_REGS |
1119 | |
1120 | LOAD_HASH_TABLE |
1121 | |
1122 | # initialize ICB: GHASH( IV ), IV - r7 |
1123 | lxvb16x 30+32, 0, 7 # load IV - v30 |
1124 | |
1125 | mr 12, 5 # length |
1126 | li 11, 0 # block index |
1127 | |
1128 | # counter 1 |
1129 | vxor 31, 31, 31 |
1130 | vspltisb 22, 1 |
1131 | vsldoi 31, 31, 22,1 # counter 1 |
1132 | |
1133 | # load round key to VSR |
1134 | lxv 0, 0(6) |
1135 | lxv 1, 0x10(6) |
1136 | lxv 2, 0x20(6) |
1137 | lxv 3, 0x30(6) |
1138 | lxv 4, 0x40(6) |
1139 | lxv 5, 0x50(6) |
1140 | lxv 6, 0x60(6) |
1141 | lxv 7, 0x70(6) |
1142 | lxv 8, 0x80(6) |
1143 | lxv 9, 0x90(6) |
1144 | lxv 10, 0xa0(6) |
1145 | |
1146 | # load rounds - 10 (128), 12 (192), 14 (256) |
1147 | lwz 9,240(6) |
1148 | |
1149 | # |
1150 | # vxor state, state, w # addroundkey |
1151 | xxlor 32+29, 0, 0 |
1152 | vxor 15, 30, 29 # IV + round key - add round key 0 |
1153 | |
1154 | cmpdi 9, 10 |
1155 | beq Loop_aes_gcm_8x_dec |
1156 | |
1157 | # load 2 more round keys (v11, v12) |
1158 | lxv 11, 0xb0(6) |
1159 | lxv 12, 0xc0(6) |
1160 | |
1161 | cmpdi 9, 12 |
1162 | beq Loop_aes_gcm_8x_dec |
1163 | |
1164 | # load 2 more round keys (v11, v12, v13, v14) |
1165 | lxv 13, 0xd0(6) |
1166 | lxv 14, 0xe0(6) |
1167 | cmpdi 9, 14 |
1168 | beq Loop_aes_gcm_8x_dec |
1169 | |
1170 | b aes_gcm_out |
1171 | |
1172 | .align 5 |
1173 | Loop_aes_gcm_8x_dec: |
1174 | mr 14, 3 |
1175 | mr 9, 4 |
1176 | |
1177 | # |
1178 | # check partial block |
1179 | # |
1180 | Continue_partial_check_dec: |
1181 | ld 15, 56(7) |
1182 | cmpdi 15, 0 |
1183 | beq Continue_dec |
1184 | bgt Final_block_dec |
1185 | cmpdi 15, 16 |
1186 | blt Final_block_dec |
1187 | |
1188 | Continue_dec: |
1189 | # n blcoks |
1190 | li 10, 128 |
1191 | divdu 10, 12, 10 # n 128 bytes-blocks |
1192 | cmpdi 10, 0 |
1193 | beq Loop_last_block_dec |
1194 | |
1195 | vaddudm 30, 30, 31 # IV + counter |
1196 | vxor 16, 30, 29 |
1197 | vaddudm 30, 30, 31 |
1198 | vxor 17, 30, 29 |
1199 | vaddudm 30, 30, 31 |
1200 | vxor 18, 30, 29 |
1201 | vaddudm 30, 30, 31 |
1202 | vxor 19, 30, 29 |
1203 | vaddudm 30, 30, 31 |
1204 | vxor 20, 30, 29 |
1205 | vaddudm 30, 30, 31 |
1206 | vxor 21, 30, 29 |
1207 | vaddudm 30, 30, 31 |
1208 | vxor 22, 30, 29 |
1209 | |
1210 | mtctr 10 |
1211 | |
1212 | li 15, 16 |
1213 | li 16, 32 |
1214 | li 17, 48 |
1215 | li 18, 64 |
1216 | li 19, 80 |
1217 | li 20, 96 |
1218 | li 21, 112 |
1219 | |
1220 | lwz 10, 240(6) |
1221 | |
1222 | Loop_8x_block_dec: |
1223 | |
1224 | lxvb16x 15, 0, 14 # load block |
1225 | lxvb16x 16, 15, 14 # load block |
1226 | lxvb16x 17, 16, 14 # load block |
1227 | lxvb16x 18, 17, 14 # load block |
1228 | lxvb16x 19, 18, 14 # load block |
1229 | lxvb16x 20, 19, 14 # load block |
1230 | lxvb16x 21, 20, 14 # load block |
1231 | lxvb16x 22, 21, 14 # load block |
1232 | addi 14, 14, 128 |
1233 | |
1234 | Loop_aes_middle8x |
1235 | |
1236 | xxlor 23+32, 10, 10 |
1237 | |
1238 | cmpdi 10, 10 |
1239 | beq Do_next_ghash_dec |
1240 | |
1241 | # 192 bits |
1242 | xxlor 24+32, 11, 11 |
1243 | |
1244 | vcipher 15, 15, 23 |
1245 | vcipher 16, 16, 23 |
1246 | vcipher 17, 17, 23 |
1247 | vcipher 18, 18, 23 |
1248 | vcipher 19, 19, 23 |
1249 | vcipher 20, 20, 23 |
1250 | vcipher 21, 21, 23 |
1251 | vcipher 22, 22, 23 |
1252 | |
1253 | vcipher 15, 15, 24 |
1254 | vcipher 16, 16, 24 |
1255 | vcipher 17, 17, 24 |
1256 | vcipher 18, 18, 24 |
1257 | vcipher 19, 19, 24 |
1258 | vcipher 20, 20, 24 |
1259 | vcipher 21, 21, 24 |
1260 | vcipher 22, 22, 24 |
1261 | |
1262 | xxlor 23+32, 12, 12 |
1263 | |
1264 | cmpdi 10, 12 |
1265 | beq Do_next_ghash_dec |
1266 | |
1267 | # 256 bits |
1268 | xxlor 24+32, 13, 13 |
1269 | |
1270 | vcipher 15, 15, 23 |
1271 | vcipher 16, 16, 23 |
1272 | vcipher 17, 17, 23 |
1273 | vcipher 18, 18, 23 |
1274 | vcipher 19, 19, 23 |
1275 | vcipher 20, 20, 23 |
1276 | vcipher 21, 21, 23 |
1277 | vcipher 22, 22, 23 |
1278 | |
1279 | vcipher 15, 15, 24 |
1280 | vcipher 16, 16, 24 |
1281 | vcipher 17, 17, 24 |
1282 | vcipher 18, 18, 24 |
1283 | vcipher 19, 19, 24 |
1284 | vcipher 20, 20, 24 |
1285 | vcipher 21, 21, 24 |
1286 | vcipher 22, 22, 24 |
1287 | |
1288 | xxlor 23+32, 14, 14 |
1289 | |
1290 | cmpdi 10, 14 |
1291 | beq Do_next_ghash_dec |
1292 | b aes_gcm_out |
1293 | |
1294 | Do_next_ghash_dec: |
1295 | |
1296 | # |
1297 | # last round |
1298 | vcipherlast 15, 15, 23 |
1299 | vcipherlast 16, 16, 23 |
1300 | |
1301 | xxlxor 47, 47, 15 |
1302 | stxvb16x 47, 0, 9 # store output |
1303 | xxlxor 48, 48, 16 |
1304 | stxvb16x 48, 15, 9 # store output |
1305 | |
1306 | vcipherlast 17, 17, 23 |
1307 | vcipherlast 18, 18, 23 |
1308 | |
1309 | xxlxor 49, 49, 17 |
1310 | stxvb16x 49, 16, 9 # store output |
1311 | xxlxor 50, 50, 18 |
1312 | stxvb16x 50, 17, 9 # store output |
1313 | |
1314 | vcipherlast 19, 19, 23 |
1315 | vcipherlast 20, 20, 23 |
1316 | |
1317 | xxlxor 51, 51, 19 |
1318 | stxvb16x 51, 18, 9 # store output |
1319 | xxlxor 52, 52, 20 |
1320 | stxvb16x 52, 19, 9 # store output |
1321 | |
1322 | vcipherlast 21, 21, 23 |
1323 | vcipherlast 22, 22, 23 |
1324 | |
1325 | xxlxor 53, 53, 21 |
1326 | stxvb16x 53, 20, 9 # store output |
1327 | xxlxor 54, 54, 22 |
1328 | stxvb16x 54, 21, 9 # store output |
1329 | |
1330 | addi 9, 9, 128 |
1331 | |
1332 | xxlor 15+32, 15, 15 |
1333 | xxlor 16+32, 16, 16 |
1334 | xxlor 17+32, 17, 17 |
1335 | xxlor 18+32, 18, 18 |
1336 | xxlor 19+32, 19, 19 |
1337 | xxlor 20+32, 20, 20 |
1338 | xxlor 21+32, 21, 21 |
1339 | xxlor 22+32, 22, 22 |
1340 | |
1341 | # ghash here |
1342 | ppc_aes_gcm_ghash2_4x |
1343 | |
1344 | xxlor 27+32, 0, 0 |
1345 | vaddudm 30, 30, 31 # IV + counter |
1346 | vmr 29, 30 |
1347 | vxor 15, 30, 27 # add round key |
1348 | vaddudm 30, 30, 31 |
1349 | vxor 16, 30, 27 |
1350 | vaddudm 30, 30, 31 |
1351 | vxor 17, 30, 27 |
1352 | vaddudm 30, 30, 31 |
1353 | vxor 18, 30, 27 |
1354 | vaddudm 30, 30, 31 |
1355 | vxor 19, 30, 27 |
1356 | vaddudm 30, 30, 31 |
1357 | vxor 20, 30, 27 |
1358 | vaddudm 30, 30, 31 |
1359 | vxor 21, 30, 27 |
1360 | vaddudm 30, 30, 31 |
1361 | vxor 22, 30, 27 |
1362 | |
1363 | addi 12, 12, -128 |
1364 | addi 11, 11, 128 |
1365 | |
1366 | bdnz Loop_8x_block_dec |
1367 | |
1368 | vmr 30, 29 |
1369 | stxvb16x 30+32, 0, 7 # update IV |
1370 | |
1371 | Loop_last_block_dec: |
1372 | cmpdi 12, 0 |
1373 | beq aes_gcm_out |
1374 | |
1375 | # loop last few blocks |
1376 | li 10, 16 |
1377 | divdu 10, 12, 10 |
1378 | |
1379 | mtctr 10 |
1380 | |
1381 | lwz 10, 240(6) |
1382 | |
1383 | cmpdi 12, 16 |
1384 | blt Final_block_dec |
1385 | |
1386 | Next_rem_block_dec: |
1387 | lxvb16x 15, 0, 14 # load block |
1388 | |
1389 | Loop_aes_middle_1x |
1390 | |
1391 | xxlor 23+32, 10, 10 |
1392 | |
1393 | cmpdi 10, 10 |
1394 | beq Do_next_1x_dec |
1395 | |
1396 | # 192 bits |
1397 | xxlor 24+32, 11, 11 |
1398 | |
1399 | vcipher 15, 15, 23 |
1400 | vcipher 15, 15, 24 |
1401 | |
1402 | xxlor 23+32, 12, 12 |
1403 | |
1404 | cmpdi 10, 12 |
1405 | beq Do_next_1x_dec |
1406 | |
1407 | # 256 bits |
1408 | xxlor 24+32, 13, 13 |
1409 | |
1410 | vcipher 15, 15, 23 |
1411 | vcipher 15, 15, 24 |
1412 | |
1413 | xxlor 23+32, 14, 14 |
1414 | |
1415 | cmpdi 10, 14 |
1416 | beq Do_next_1x_dec |
1417 | |
1418 | Do_next_1x_dec: |
1419 | vcipherlast 15, 15, 23 |
1420 | |
1421 | xxlxor 47, 47, 15 |
1422 | stxvb16x 47, 0, 9 # store output |
1423 | addi 14, 14, 16 |
1424 | addi 9, 9, 16 |
1425 | |
1426 | xxlor 28+32, 15, 15 |
1427 | #vmr 28, 15 |
1428 | ppc_update_hash_1x |
1429 | |
1430 | addi 12, 12, -16 |
1431 | addi 11, 11, 16 |
1432 | xxlor 19+32, 0, 0 |
1433 | vaddudm 30, 30, 31 # IV + counter |
1434 | vxor 15, 30, 19 # add round key |
1435 | |
1436 | bdnz Next_rem_block_dec |
1437 | |
1438 | li 15, 0 |
1439 | std 15, 56(7) # clear partial? |
1440 | stxvb16x 30+32, 0, 7 # update IV |
1441 | cmpdi 12, 0 |
1442 | beq aes_gcm_out |
1443 | |
1444 | Final_block_dec: |
1445 | lwz 10, 240(6) |
1446 | Loop_aes_middle_1x |
1447 | |
1448 | xxlor 23+32, 10, 10 |
1449 | |
1450 | cmpdi 10, 10 |
1451 | beq Do_final_1x_dec |
1452 | |
1453 | # 192 bits |
1454 | xxlor 24+32, 11, 11 |
1455 | |
1456 | vcipher 15, 15, 23 |
1457 | vcipher 15, 15, 24 |
1458 | |
1459 | xxlor 23+32, 12, 12 |
1460 | |
1461 | cmpdi 10, 12 |
1462 | beq Do_final_1x_dec |
1463 | |
1464 | # 256 bits |
1465 | xxlor 24+32, 13, 13 |
1466 | |
1467 | vcipher 15, 15, 23 |
1468 | vcipher 15, 15, 24 |
1469 | |
1470 | xxlor 23+32, 14, 14 |
1471 | |
1472 | cmpdi 10, 14 |
1473 | beq Do_final_1x_dec |
1474 | |
1475 | Do_final_1x_dec: |
1476 | vcipherlast 15, 15, 23 |
1477 | |
1478 | # check partial block |
1479 | li 21, 1 # decrypt |
1480 | ld 15, 56(7) # partial? |
1481 | cmpdi 15, 0 |
1482 | beq Normal_block_dec |
1483 | bl Do_partial_block |
1484 | cmpdi 12, 0 |
1485 | ble aes_gcm_out |
1486 | |
1487 | b Continue_partial_check_dec |
1488 | |
1489 | Normal_block_dec: |
1490 | lxvb16x 15, 0, 14 # load last block |
1491 | xxlxor 47, 47, 15 |
1492 | |
1493 | # create partial block mask |
1494 | li 15, 16 |
1495 | sub 15, 15, 12 # index to the mask |
1496 | |
1497 | vspltisb 16, -1 # first 16 bytes - 0xffff...ff |
1498 | vspltisb 17, 0 # second 16 bytes - 0x0000...00 |
1499 | li 10, 192 |
1500 | stvx 16, 10, 1 |
1501 | addi 10, 10, 16 |
1502 | stvx 17, 10, 1 |
1503 | |
1504 | addi 10, 1, 192 |
1505 | lxvb16x 16, 15, 10 # load partial block mask |
1506 | xxland 47, 47, 16 |
1507 | |
1508 | xxland 32+28, 15, 16 |
1509 | #vmr 28, 15 |
1510 | ppc_update_hash_1x |
1511 | |
1512 | # * should store only the remaining bytes. |
1513 | bl Write_partial_block |
1514 | |
1515 | stxvb16x 30+32, 0, 7 # update IV |
1516 | std 12, 56(7) # update partial? |
1517 | li 16, 16 |
1518 | |
1519 | stxvb16x 32, 0, 8 # write out Xi |
1520 | stxvb16x 32, 16, 8 # write out Xi |
1521 | b aes_gcm_out |
1522 | |