1/* SPDX-License-Identifier: GPL-2.0-or-later */
2 #
3 # Accelerated AES-GCM stitched implementation for ppc64le.
4 #
5 # Copyright 2022- IBM Inc. All rights reserved
6 #
7 #===================================================================================
8 # Written by Danny Tsen <dtsen@linux.ibm.com>
9 #
10 # GHASH is based on the Karatsuba multiplication method.
11 #
12 # Xi xor X1
13 #
14 # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
15 # (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
16 # (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
17 # (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
18 # (X4.h * H.h + X4.l * H.l + X4 * H)
19 #
20 # Xi = v0
21 # H Poly = v2
22 # Hash keys = v3 - v14
23 # ( H.l, H, H.h)
24 # ( H^2.l, H^2, H^2.h)
25 # ( H^3.l, H^3, H^3.h)
26 # ( H^4.l, H^4, H^4.h)
27 #
28 # v30 is IV
29 # v31 - counter 1
30 #
31 # AES used,
32 # vs0 - vs14 for round keys
33 # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
34 #
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
36 # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
37 #
38 # ===================================================================================
39 #
40
41#include <asm/ppc_asm.h>
42#include <linux/linkage.h>
43
44.machine "any"
45.text
46
47 # 4x loops
48 # v15 - v18 - input states
49 # vs1 - vs9 - round keys
50 #
51.macro Loop_aes_middle4x
52 xxlor 19+32, 1, 1
53 xxlor 20+32, 2, 2
54 xxlor 21+32, 3, 3
55 xxlor 22+32, 4, 4
56
57 vcipher 15, 15, 19
58 vcipher 16, 16, 19
59 vcipher 17, 17, 19
60 vcipher 18, 18, 19
61
62 vcipher 15, 15, 20
63 vcipher 16, 16, 20
64 vcipher 17, 17, 20
65 vcipher 18, 18, 20
66
67 vcipher 15, 15, 21
68 vcipher 16, 16, 21
69 vcipher 17, 17, 21
70 vcipher 18, 18, 21
71
72 vcipher 15, 15, 22
73 vcipher 16, 16, 22
74 vcipher 17, 17, 22
75 vcipher 18, 18, 22
76
77 xxlor 19+32, 5, 5
78 xxlor 20+32, 6, 6
79 xxlor 21+32, 7, 7
80 xxlor 22+32, 8, 8
81
82 vcipher 15, 15, 19
83 vcipher 16, 16, 19
84 vcipher 17, 17, 19
85 vcipher 18, 18, 19
86
87 vcipher 15, 15, 20
88 vcipher 16, 16, 20
89 vcipher 17, 17, 20
90 vcipher 18, 18, 20
91
92 vcipher 15, 15, 21
93 vcipher 16, 16, 21
94 vcipher 17, 17, 21
95 vcipher 18, 18, 21
96
97 vcipher 15, 15, 22
98 vcipher 16, 16, 22
99 vcipher 17, 17, 22
100 vcipher 18, 18, 22
101
102 xxlor 23+32, 9, 9
103 vcipher 15, 15, 23
104 vcipher 16, 16, 23
105 vcipher 17, 17, 23
106 vcipher 18, 18, 23
107.endm
108
109 # 8x loops
110 # v15 - v22 - input states
111 # vs1 - vs9 - round keys
112 #
113.macro Loop_aes_middle8x
114 xxlor 23+32, 1, 1
115 xxlor 24+32, 2, 2
116 xxlor 25+32, 3, 3
117 xxlor 26+32, 4, 4
118
119 vcipher 15, 15, 23
120 vcipher 16, 16, 23
121 vcipher 17, 17, 23
122 vcipher 18, 18, 23
123 vcipher 19, 19, 23
124 vcipher 20, 20, 23
125 vcipher 21, 21, 23
126 vcipher 22, 22, 23
127
128 vcipher 15, 15, 24
129 vcipher 16, 16, 24
130 vcipher 17, 17, 24
131 vcipher 18, 18, 24
132 vcipher 19, 19, 24
133 vcipher 20, 20, 24
134 vcipher 21, 21, 24
135 vcipher 22, 22, 24
136
137 vcipher 15, 15, 25
138 vcipher 16, 16, 25
139 vcipher 17, 17, 25
140 vcipher 18, 18, 25
141 vcipher 19, 19, 25
142 vcipher 20, 20, 25
143 vcipher 21, 21, 25
144 vcipher 22, 22, 25
145
146 vcipher 15, 15, 26
147 vcipher 16, 16, 26
148 vcipher 17, 17, 26
149 vcipher 18, 18, 26
150 vcipher 19, 19, 26
151 vcipher 20, 20, 26
152 vcipher 21, 21, 26
153 vcipher 22, 22, 26
154
155 xxlor 23+32, 5, 5
156 xxlor 24+32, 6, 6
157 xxlor 25+32, 7, 7
158 xxlor 26+32, 8, 8
159
160 vcipher 15, 15, 23
161 vcipher 16, 16, 23
162 vcipher 17, 17, 23
163 vcipher 18, 18, 23
164 vcipher 19, 19, 23
165 vcipher 20, 20, 23
166 vcipher 21, 21, 23
167 vcipher 22, 22, 23
168
169 vcipher 15, 15, 24
170 vcipher 16, 16, 24
171 vcipher 17, 17, 24
172 vcipher 18, 18, 24
173 vcipher 19, 19, 24
174 vcipher 20, 20, 24
175 vcipher 21, 21, 24
176 vcipher 22, 22, 24
177
178 vcipher 15, 15, 25
179 vcipher 16, 16, 25
180 vcipher 17, 17, 25
181 vcipher 18, 18, 25
182 vcipher 19, 19, 25
183 vcipher 20, 20, 25
184 vcipher 21, 21, 25
185 vcipher 22, 22, 25
186
187 vcipher 15, 15, 26
188 vcipher 16, 16, 26
189 vcipher 17, 17, 26
190 vcipher 18, 18, 26
191 vcipher 19, 19, 26
192 vcipher 20, 20, 26
193 vcipher 21, 21, 26
194 vcipher 22, 22, 26
195
196 xxlor 23+32, 9, 9
197 vcipher 15, 15, 23
198 vcipher 16, 16, 23
199 vcipher 17, 17, 23
200 vcipher 18, 18, 23
201 vcipher 19, 19, 23
202 vcipher 20, 20, 23
203 vcipher 21, 21, 23
204 vcipher 22, 22, 23
205.endm
206
207.macro Loop_aes_middle_1x
208 xxlor 19+32, 1, 1
209 xxlor 20+32, 2, 2
210 xxlor 21+32, 3, 3
211 xxlor 22+32, 4, 4
212
213 vcipher 15, 15, 19
214 vcipher 15, 15, 20
215 vcipher 15, 15, 21
216 vcipher 15, 15, 22
217
218 xxlor 19+32, 5, 5
219 xxlor 20+32, 6, 6
220 xxlor 21+32, 7, 7
221 xxlor 22+32, 8, 8
222
223 vcipher 15, 15, 19
224 vcipher 15, 15, 20
225 vcipher 15, 15, 21
226 vcipher 15, 15, 22
227
228 xxlor 19+32, 9, 9
229 vcipher 15, 15, 19
230.endm
231
232 #
233 # Compute 4x hash values based on Karatsuba method.
234 #
235.macro ppc_aes_gcm_ghash
236 vxor 15, 15, 0
237
238 vpmsumd 23, 12, 15 # H4.L * X.L
239 vpmsumd 24, 9, 16
240 vpmsumd 25, 6, 17
241 vpmsumd 26, 3, 18
242
243 vxor 23, 23, 24
244 vxor 23, 23, 25
245 vxor 23, 23, 26 # L
246
247 vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
248 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
249 vpmsumd 26, 7, 17
250 vpmsumd 27, 4, 18
251
252 vxor 24, 24, 25
253 vxor 24, 24, 26
254 vxor 24, 24, 27 # M
255
256 # sum hash and reduction with H Poly
257 vpmsumd 28, 23, 2 # reduction
258
259 vxor 29, 29, 29
260 vsldoi 26, 24, 29, 8 # mL
261 vsldoi 29, 29, 24, 8 # mH
262 vxor 23, 23, 26 # mL + L
263
264 vsldoi 23, 23, 23, 8 # swap
265 vxor 23, 23, 28
266
267 vpmsumd 24, 14, 15 # H4.H * X.H
268 vpmsumd 25, 11, 16
269 vpmsumd 26, 8, 17
270 vpmsumd 27, 5, 18
271
272 vxor 24, 24, 25
273 vxor 24, 24, 26
274 vxor 24, 24, 27
275
276 vxor 24, 24, 29
277
278 # sum hash and reduction with H Poly
279 vsldoi 27, 23, 23, 8 # swap
280 vpmsumd 23, 23, 2
281 vxor 27, 27, 24
282 vxor 23, 23, 27
283
284 xxlor 32, 23+32, 23+32 # update hash
285
286.endm
287
288 #
289 # Combine two 4x ghash
290 # v15 - v22 - input blocks
291 #
292.macro ppc_aes_gcm_ghash2_4x
293 # first 4x hash
294 vxor 15, 15, 0 # Xi + X
295
296 vpmsumd 23, 12, 15 # H4.L * X.L
297 vpmsumd 24, 9, 16
298 vpmsumd 25, 6, 17
299 vpmsumd 26, 3, 18
300
301 vxor 23, 23, 24
302 vxor 23, 23, 25
303 vxor 23, 23, 26 # L
304
305 vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
306 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
307 vpmsumd 26, 7, 17
308 vpmsumd 27, 4, 18
309
310 vxor 24, 24, 25
311 vxor 24, 24, 26
312
313 # sum hash and reduction with H Poly
314 vpmsumd 28, 23, 2 # reduction
315
316 vxor 29, 29, 29
317
318 vxor 24, 24, 27 # M
319 vsldoi 26, 24, 29, 8 # mL
320 vsldoi 29, 29, 24, 8 # mH
321 vxor 23, 23, 26 # mL + L
322
323 vsldoi 23, 23, 23, 8 # swap
324 vxor 23, 23, 28
325
326 vpmsumd 24, 14, 15 # H4.H * X.H
327 vpmsumd 25, 11, 16
328 vpmsumd 26, 8, 17
329 vpmsumd 27, 5, 18
330
331 vxor 24, 24, 25
332 vxor 24, 24, 26
333 vxor 24, 24, 27 # H
334
335 vxor 24, 24, 29 # H + mH
336
337 # sum hash and reduction with H Poly
338 vsldoi 27, 23, 23, 8 # swap
339 vpmsumd 23, 23, 2
340 vxor 27, 27, 24
341 vxor 27, 23, 27 # 1st Xi
342
343 # 2nd 4x hash
344 vpmsumd 24, 9, 20
345 vpmsumd 25, 6, 21
346 vpmsumd 26, 3, 22
347 vxor 19, 19, 27 # Xi + X
348 vpmsumd 23, 12, 19 # H4.L * X.L
349
350 vxor 23, 23, 24
351 vxor 23, 23, 25
352 vxor 23, 23, 26 # L
353
354 vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L
355 vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L
356 vpmsumd 26, 7, 21
357 vpmsumd 27, 4, 22
358
359 vxor 24, 24, 25
360 vxor 24, 24, 26
361
362 # sum hash and reduction with H Poly
363 vpmsumd 28, 23, 2 # reduction
364
365 vxor 29, 29, 29
366
367 vxor 24, 24, 27 # M
368 vsldoi 26, 24, 29, 8 # mL
369 vsldoi 29, 29, 24, 8 # mH
370 vxor 23, 23, 26 # mL + L
371
372 vsldoi 23, 23, 23, 8 # swap
373 vxor 23, 23, 28
374
375 vpmsumd 24, 14, 19 # H4.H * X.H
376 vpmsumd 25, 11, 20
377 vpmsumd 26, 8, 21
378 vpmsumd 27, 5, 22
379
380 vxor 24, 24, 25
381 vxor 24, 24, 26
382 vxor 24, 24, 27 # H
383
384 vxor 24, 24, 29 # H + mH
385
386 # sum hash and reduction with H Poly
387 vsldoi 27, 23, 23, 8 # swap
388 vpmsumd 23, 23, 2
389 vxor 27, 27, 24
390 vxor 23, 23, 27
391
392 xxlor 32, 23+32, 23+32 # update hash
393
394.endm
395
396 #
397 # Compute update single hash
398 #
399.macro ppc_update_hash_1x
400 vxor 28, 28, 0
401
402 vxor 19, 19, 19
403
404 vpmsumd 22, 3, 28 # L
405 vpmsumd 23, 4, 28 # M
406 vpmsumd 24, 5, 28 # H
407
408 vpmsumd 27, 22, 2 # reduction
409
410 vsldoi 25, 23, 19, 8 # mL
411 vsldoi 26, 19, 23, 8 # mH
412 vxor 22, 22, 25 # LL + LL
413 vxor 24, 24, 26 # HH + HH
414
415 vsldoi 22, 22, 22, 8 # swap
416 vxor 22, 22, 27
417
418 vsldoi 20, 22, 22, 8 # swap
419 vpmsumd 22, 22, 2 # reduction
420 vxor 20, 20, 24
421 vxor 22, 22, 20
422
423 vmr 0, 22 # update hash
424
425.endm
426
427.macro SAVE_REGS
428 stdu 1,-640(1)
429 mflr 0
430
431 std 14,112(1)
432 std 15,120(1)
433 std 16,128(1)
434 std 17,136(1)
435 std 18,144(1)
436 std 19,152(1)
437 std 20,160(1)
438 std 21,168(1)
439 li 9, 256
440 stvx 20, 9, 1
441 addi 9, 9, 16
442 stvx 21, 9, 1
443 addi 9, 9, 16
444 stvx 22, 9, 1
445 addi 9, 9, 16
446 stvx 23, 9, 1
447 addi 9, 9, 16
448 stvx 24, 9, 1
449 addi 9, 9, 16
450 stvx 25, 9, 1
451 addi 9, 9, 16
452 stvx 26, 9, 1
453 addi 9, 9, 16
454 stvx 27, 9, 1
455 addi 9, 9, 16
456 stvx 28, 9, 1
457 addi 9, 9, 16
458 stvx 29, 9, 1
459 addi 9, 9, 16
460 stvx 30, 9, 1
461 addi 9, 9, 16
462 stvx 31, 9, 1
463 stxv 14, 464(1)
464 stxv 15, 480(1)
465 stxv 16, 496(1)
466 stxv 17, 512(1)
467 stxv 18, 528(1)
468 stxv 19, 544(1)
469 stxv 20, 560(1)
470 stxv 21, 576(1)
471 stxv 22, 592(1)
472 std 0, 656(1)
473.endm
474
475.macro RESTORE_REGS
476 lxv 14, 464(1)
477 lxv 15, 480(1)
478 lxv 16, 496(1)
479 lxv 17, 512(1)
480 lxv 18, 528(1)
481 lxv 19, 544(1)
482 lxv 20, 560(1)
483 lxv 21, 576(1)
484 lxv 22, 592(1)
485 li 9, 256
486 lvx 20, 9, 1
487 addi 9, 9, 16
488 lvx 21, 9, 1
489 addi 9, 9, 16
490 lvx 22, 9, 1
491 addi 9, 9, 16
492 lvx 23, 9, 1
493 addi 9, 9, 16
494 lvx 24, 9, 1
495 addi 9, 9, 16
496 lvx 25, 9, 1
497 addi 9, 9, 16
498 lvx 26, 9, 1
499 addi 9, 9, 16
500 lvx 27, 9, 1
501 addi 9, 9, 16
502 lvx 28, 9, 1
503 addi 9, 9, 16
504 lvx 29, 9, 1
505 addi 9, 9, 16
506 lvx 30, 9, 1
507 addi 9, 9, 16
508 lvx 31, 9, 1
509
510 ld 0, 656(1)
511 ld 14,112(1)
512 ld 15,120(1)
513 ld 16,128(1)
514 ld 17,136(1)
515 ld 18,144(1)
516 ld 19,152(1)
517 ld 20,160(1)
518 ld 21,168(1)
519
520 mtlr 0
521 addi 1, 1, 640
522.endm
523
524.macro LOAD_HASH_TABLE
525 # Load Xi
526 lxvb16x 32, 0, 8 # load Xi
527
528 # load Hash - h^4, h^3, h^2, h
529 li 10, 32
530 lxvd2x 2+32, 10, 8 # H Poli
531 li 10, 48
532 lxvd2x 3+32, 10, 8 # Hl
533 li 10, 64
534 lxvd2x 4+32, 10, 8 # H
535 li 10, 80
536 lxvd2x 5+32, 10, 8 # Hh
537
538 li 10, 96
539 lxvd2x 6+32, 10, 8 # H^2l
540 li 10, 112
541 lxvd2x 7+32, 10, 8 # H^2
542 li 10, 128
543 lxvd2x 8+32, 10, 8 # H^2h
544
545 li 10, 144
546 lxvd2x 9+32, 10, 8 # H^3l
547 li 10, 160
548 lxvd2x 10+32, 10, 8 # H^3
549 li 10, 176
550 lxvd2x 11+32, 10, 8 # H^3h
551
552 li 10, 192
553 lxvd2x 12+32, 10, 8 # H^4l
554 li 10, 208
555 lxvd2x 13+32, 10, 8 # H^4
556 li 10, 224
557 lxvd2x 14+32, 10, 8 # H^4h
558.endm
559
560 #
561 # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
562 # const char *rk, unsigned char iv[16], void *Xip);
563 #
564 # r3 - inp
565 # r4 - out
566 # r5 - len
567 # r6 - AES round keys
568 # r7 - iv and other data
569 # r8 - Xi, HPoli, hash keys
570 #
571 # rounds is at offset 240 in rk
572 # Xi is at 0 in gcm_table (Xip).
573 #
574_GLOBAL(aes_p10_gcm_encrypt)
575.align 5
576
577 SAVE_REGS
578
579 LOAD_HASH_TABLE
580
581 # initialize ICB: GHASH( IV ), IV - r7
582 lxvb16x 30+32, 0, 7 # load IV - v30
583
584 mr 12, 5 # length
585 li 11, 0 # block index
586
587 # counter 1
588 vxor 31, 31, 31
589 vspltisb 22, 1
590 vsldoi 31, 31, 22,1 # counter 1
591
592 # load round key to VSR
593 lxv 0, 0(6)
594 lxv 1, 0x10(6)
595 lxv 2, 0x20(6)
596 lxv 3, 0x30(6)
597 lxv 4, 0x40(6)
598 lxv 5, 0x50(6)
599 lxv 6, 0x60(6)
600 lxv 7, 0x70(6)
601 lxv 8, 0x80(6)
602 lxv 9, 0x90(6)
603 lxv 10, 0xa0(6)
604
605 # load rounds - 10 (128), 12 (192), 14 (256)
606 lwz 9,240(6)
607
608 #
609 # vxor state, state, w # addroundkey
610 xxlor 32+29, 0, 0
611 vxor 15, 30, 29 # IV + round key - add round key 0
612
613 cmpdi 9, 10
614 beq Loop_aes_gcm_8x
615
616 # load 2 more round keys (v11, v12)
617 lxv 11, 0xb0(6)
618 lxv 12, 0xc0(6)
619
620 cmpdi 9, 12
621 beq Loop_aes_gcm_8x
622
623 # load 2 more round keys (v11, v12, v13, v14)
624 lxv 13, 0xd0(6)
625 lxv 14, 0xe0(6)
626 cmpdi 9, 14
627 beq Loop_aes_gcm_8x
628
629 b aes_gcm_out
630
631.align 5
632Loop_aes_gcm_8x:
633 mr 14, 3
634 mr 9, 4
635
636 #
637 # check partial block
638 #
639Continue_partial_check:
640 ld 15, 56(7)
641 cmpdi 15, 0
642 beq Continue
643 bgt Final_block
644 cmpdi 15, 16
645 blt Final_block
646
647Continue:
648 # n blcoks
649 li 10, 128
650 divdu 10, 12, 10 # n 128 bytes-blocks
651 cmpdi 10, 0
652 beq Loop_last_block
653
654 vaddudm 30, 30, 31 # IV + counter
655 vxor 16, 30, 29
656 vaddudm 30, 30, 31
657 vxor 17, 30, 29
658 vaddudm 30, 30, 31
659 vxor 18, 30, 29
660 vaddudm 30, 30, 31
661 vxor 19, 30, 29
662 vaddudm 30, 30, 31
663 vxor 20, 30, 29
664 vaddudm 30, 30, 31
665 vxor 21, 30, 29
666 vaddudm 30, 30, 31
667 vxor 22, 30, 29
668
669 mtctr 10
670
671 li 15, 16
672 li 16, 32
673 li 17, 48
674 li 18, 64
675 li 19, 80
676 li 20, 96
677 li 21, 112
678
679 lwz 10, 240(6)
680
681Loop_8x_block:
682
683 lxvb16x 15, 0, 14 # load block
684 lxvb16x 16, 15, 14 # load block
685 lxvb16x 17, 16, 14 # load block
686 lxvb16x 18, 17, 14 # load block
687 lxvb16x 19, 18, 14 # load block
688 lxvb16x 20, 19, 14 # load block
689 lxvb16x 21, 20, 14 # load block
690 lxvb16x 22, 21, 14 # load block
691 addi 14, 14, 128
692
693 Loop_aes_middle8x
694
695 xxlor 23+32, 10, 10
696
697 cmpdi 10, 10
698 beq Do_next_ghash
699
700 # 192 bits
701 xxlor 24+32, 11, 11
702
703 vcipher 15, 15, 23
704 vcipher 16, 16, 23
705 vcipher 17, 17, 23
706 vcipher 18, 18, 23
707 vcipher 19, 19, 23
708 vcipher 20, 20, 23
709 vcipher 21, 21, 23
710 vcipher 22, 22, 23
711
712 vcipher 15, 15, 24
713 vcipher 16, 16, 24
714 vcipher 17, 17, 24
715 vcipher 18, 18, 24
716 vcipher 19, 19, 24
717 vcipher 20, 20, 24
718 vcipher 21, 21, 24
719 vcipher 22, 22, 24
720
721 xxlor 23+32, 12, 12
722
723 cmpdi 10, 12
724 beq Do_next_ghash
725
726 # 256 bits
727 xxlor 24+32, 13, 13
728
729 vcipher 15, 15, 23
730 vcipher 16, 16, 23
731 vcipher 17, 17, 23
732 vcipher 18, 18, 23
733 vcipher 19, 19, 23
734 vcipher 20, 20, 23
735 vcipher 21, 21, 23
736 vcipher 22, 22, 23
737
738 vcipher 15, 15, 24
739 vcipher 16, 16, 24
740 vcipher 17, 17, 24
741 vcipher 18, 18, 24
742 vcipher 19, 19, 24
743 vcipher 20, 20, 24
744 vcipher 21, 21, 24
745 vcipher 22, 22, 24
746
747 xxlor 23+32, 14, 14
748
749 cmpdi 10, 14
750 beq Do_next_ghash
751 b aes_gcm_out
752
753Do_next_ghash:
754
755 #
756 # last round
757 vcipherlast 15, 15, 23
758 vcipherlast 16, 16, 23
759
760 xxlxor 47, 47, 15
761 stxvb16x 47, 0, 9 # store output
762 xxlxor 48, 48, 16
763 stxvb16x 48, 15, 9 # store output
764
765 vcipherlast 17, 17, 23
766 vcipherlast 18, 18, 23
767
768 xxlxor 49, 49, 17
769 stxvb16x 49, 16, 9 # store output
770 xxlxor 50, 50, 18
771 stxvb16x 50, 17, 9 # store output
772
773 vcipherlast 19, 19, 23
774 vcipherlast 20, 20, 23
775
776 xxlxor 51, 51, 19
777 stxvb16x 51, 18, 9 # store output
778 xxlxor 52, 52, 20
779 stxvb16x 52, 19, 9 # store output
780
781 vcipherlast 21, 21, 23
782 vcipherlast 22, 22, 23
783
784 xxlxor 53, 53, 21
785 stxvb16x 53, 20, 9 # store output
786 xxlxor 54, 54, 22
787 stxvb16x 54, 21, 9 # store output
788
789 addi 9, 9, 128
790
791 # ghash here
792 ppc_aes_gcm_ghash2_4x
793
794 xxlor 27+32, 0, 0
795 vaddudm 30, 30, 31 # IV + counter
796 vmr 29, 30
797 vxor 15, 30, 27 # add round key
798 vaddudm 30, 30, 31
799 vxor 16, 30, 27
800 vaddudm 30, 30, 31
801 vxor 17, 30, 27
802 vaddudm 30, 30, 31
803 vxor 18, 30, 27
804 vaddudm 30, 30, 31
805 vxor 19, 30, 27
806 vaddudm 30, 30, 31
807 vxor 20, 30, 27
808 vaddudm 30, 30, 31
809 vxor 21, 30, 27
810 vaddudm 30, 30, 31
811 vxor 22, 30, 27
812
813 addi 12, 12, -128
814 addi 11, 11, 128
815
816 bdnz Loop_8x_block
817
818 vmr 30, 29
819 stxvb16x 30+32, 0, 7 # update IV
820
821Loop_last_block:
822 cmpdi 12, 0
823 beq aes_gcm_out
824
825 # loop last few blocks
826 li 10, 16
827 divdu 10, 12, 10
828
829 mtctr 10
830
831 lwz 10, 240(6)
832
833 cmpdi 12, 16
834 blt Final_block
835
836Next_rem_block:
837 lxvb16x 15, 0, 14 # load block
838
839 Loop_aes_middle_1x
840
841 xxlor 23+32, 10, 10
842
843 cmpdi 10, 10
844 beq Do_next_1x
845
846 # 192 bits
847 xxlor 24+32, 11, 11
848
849 vcipher 15, 15, 23
850 vcipher 15, 15, 24
851
852 xxlor 23+32, 12, 12
853
854 cmpdi 10, 12
855 beq Do_next_1x
856
857 # 256 bits
858 xxlor 24+32, 13, 13
859
860 vcipher 15, 15, 23
861 vcipher 15, 15, 24
862
863 xxlor 23+32, 14, 14
864
865 cmpdi 10, 14
866 beq Do_next_1x
867
868Do_next_1x:
869 vcipherlast 15, 15, 23
870
871 xxlxor 47, 47, 15
872 stxvb16x 47, 0, 9 # store output
873 addi 14, 14, 16
874 addi 9, 9, 16
875
876 vmr 28, 15
877 ppc_update_hash_1x
878
879 addi 12, 12, -16
880 addi 11, 11, 16
881 xxlor 19+32, 0, 0
882 vaddudm 30, 30, 31 # IV + counter
883 vxor 15, 30, 19 # add round key
884
885 bdnz Next_rem_block
886
887 li 15, 0
888 std 15, 56(7) # clear partial?
889 stxvb16x 30+32, 0, 7 # update IV
890 cmpdi 12, 0
891 beq aes_gcm_out
892
893Final_block:
894 lwz 10, 240(6)
895 Loop_aes_middle_1x
896
897 xxlor 23+32, 10, 10
898
899 cmpdi 10, 10
900 beq Do_final_1x
901
902 # 192 bits
903 xxlor 24+32, 11, 11
904
905 vcipher 15, 15, 23
906 vcipher 15, 15, 24
907
908 xxlor 23+32, 12, 12
909
910 cmpdi 10, 12
911 beq Do_final_1x
912
913 # 256 bits
914 xxlor 24+32, 13, 13
915
916 vcipher 15, 15, 23
917 vcipher 15, 15, 24
918
919 xxlor 23+32, 14, 14
920
921 cmpdi 10, 14
922 beq Do_final_1x
923
924Do_final_1x:
925 vcipherlast 15, 15, 23
926
927 # check partial block
928 li 21, 0 # encrypt
929 ld 15, 56(7) # partial?
930 cmpdi 15, 0
931 beq Normal_block
932 bl Do_partial_block
933
934 cmpdi 12, 0
935 ble aes_gcm_out
936
937 b Continue_partial_check
938
939Normal_block:
940 lxvb16x 15, 0, 14 # load last block
941 xxlxor 47, 47, 15
942
943 # create partial block mask
944 li 15, 16
945 sub 15, 15, 12 # index to the mask
946
947 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
948 vspltisb 17, 0 # second 16 bytes - 0x0000...00
949 li 10, 192
950 stvx 16, 10, 1
951 addi 10, 10, 16
952 stvx 17, 10, 1
953
954 addi 10, 1, 192
955 lxvb16x 16, 15, 10 # load partial block mask
956 xxland 47, 47, 16
957
958 vmr 28, 15
959 ppc_update_hash_1x
960
961 # * should store only the remaining bytes.
962 bl Write_partial_block
963
964 stxvb16x 30+32, 0, 7 # update IV
965 std 12, 56(7) # update partial?
966 li 16, 16
967
968 stxvb16x 32, 0, 8 # write out Xi
969 stxvb16x 32, 16, 8 # write out Xi
970 b aes_gcm_out
971
972 #
973 # Compute data mask
974 #
975.macro GEN_MASK _mask _start _end
976 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
977 vspltisb 17, 0 # second 16 bytes - 0x0000...00
978 li 10, 192
979 stxvb16x 17+32, 10, 1
980 add 10, 10, \_start
981 stxvb16x 16+32, 10, 1
982 add 10, 10, \_end
983 stxvb16x 17+32, 10, 1
984
985 addi 10, 1, 192
986 lxvb16x \_mask, 0, 10 # load partial block mask
987.endm
988
989 #
990 # Handle multiple partial blocks for encrypt and decrypt
991 # operations.
992 #
993SYM_FUNC_START_LOCAL(Do_partial_block)
994 add 17, 15, 5
995 cmpdi 17, 16
996 bgt Big_block
997 GEN_MASK 18, 15, 5
998 b _Partial
999SYM_FUNC_END(Do_partial_block)
1000Big_block:
1001 li 16, 16
1002 GEN_MASK 18, 15, 16
1003
1004_Partial:
1005 lxvb16x 17+32, 0, 14 # load last block
1006 sldi 16, 15, 3
1007 mtvsrdd 32+16, 0, 16
1008 vsro 17, 17, 16
1009 xxlxor 47, 47, 17+32
1010 xxland 47, 47, 18
1011
1012 vxor 0, 0, 0 # clear Xi
1013 vmr 28, 15
1014
1015 cmpdi 21, 0 # encrypt/decrypt ops?
1016 beq Skip_decrypt
1017 xxland 32+28, 32+17, 18
1018
1019Skip_decrypt:
1020
1021 ppc_update_hash_1x
1022
1023 li 16, 16
1024 lxvb16x 32+29, 16, 8
1025 vxor 0, 0, 29
1026 stxvb16x 32, 0, 8 # save Xi
1027 stxvb16x 32, 16, 8 # save Xi
1028
1029 # store partial block
1030 # loop the rest of the stream if any
1031 sldi 16, 15, 3
1032 mtvsrdd 32+16, 0, 16
1033 vslo 15, 15, 16
1034 #stxvb16x 15+32, 0, 9 # last block
1035
1036 li 16, 16
1037 sub 17, 16, 15 # 16 - partial
1038
1039 add 16, 15, 5
1040 cmpdi 16, 16
1041 bgt Larger_16
1042 mr 17, 5
1043Larger_16:
1044
1045 # write partial
1046 li 10, 192
1047 stxvb16x 15+32, 10, 1 # save current block
1048
1049 addi 10, 9, -1
1050 addi 16, 1, 191
1051 mtctr 17 # move partial byte count
1052
1053Write_last_partial:
1054 lbzu 18, 1(16)
1055 stbu 18, 1(10)
1056 bdnz Write_last_partial
1057 # Complete loop partial
1058
1059 add 14, 14, 17
1060 add 9, 9, 17
1061 sub 12, 12, 17
1062 add 11, 11, 17
1063
1064 add 15, 15, 5
1065 cmpdi 15, 16
1066 blt Save_partial
1067
1068 vaddudm 30, 30, 31
1069 stxvb16x 30+32, 0, 7 # update IV
1070 xxlor 32+29, 0, 0
1071 vxor 15, 30, 29 # IV + round key - add round key 0
1072 li 15, 0
1073 std 15, 56(7) # partial done - clear
1074 b Partial_done
1075Save_partial:
1076 std 15, 56(7) # partial
1077
1078Partial_done:
1079 blr
1080
1081 #
1082 # Write partial block
1083 # r9 - output
1084 # r12 - remaining bytes
1085 # v15 - partial input data
1086 #
1087SYM_FUNC_START_LOCAL(Write_partial_block)
1088 li 10, 192
1089 stxvb16x 15+32, 10, 1 # last block
1090
1091 addi 10, 9, -1
1092 addi 16, 1, 191
1093
1094 mtctr 12 # remaining bytes
1095 li 15, 0
1096
1097Write_last_byte:
1098 lbzu 14, 1(16)
1099 stbu 14, 1(10)
1100 bdnz Write_last_byte
1101 blr
1102SYM_FUNC_END(Write_partial_block)
1103
1104aes_gcm_out:
1105 # out = state
1106 stxvb16x 32, 0, 8 # write out Xi
1107 add 3, 11, 12 # return count
1108
1109 RESTORE_REGS
1110 blr
1111
1112 #
1113 # 8x Decrypt
1114 #
1115_GLOBAL(aes_p10_gcm_decrypt)
1116.align 5
1117
1118 SAVE_REGS
1119
1120 LOAD_HASH_TABLE
1121
1122 # initialize ICB: GHASH( IV ), IV - r7
1123 lxvb16x 30+32, 0, 7 # load IV - v30
1124
1125 mr 12, 5 # length
1126 li 11, 0 # block index
1127
1128 # counter 1
1129 vxor 31, 31, 31
1130 vspltisb 22, 1
1131 vsldoi 31, 31, 22,1 # counter 1
1132
1133 # load round key to VSR
1134 lxv 0, 0(6)
1135 lxv 1, 0x10(6)
1136 lxv 2, 0x20(6)
1137 lxv 3, 0x30(6)
1138 lxv 4, 0x40(6)
1139 lxv 5, 0x50(6)
1140 lxv 6, 0x60(6)
1141 lxv 7, 0x70(6)
1142 lxv 8, 0x80(6)
1143 lxv 9, 0x90(6)
1144 lxv 10, 0xa0(6)
1145
1146 # load rounds - 10 (128), 12 (192), 14 (256)
1147 lwz 9,240(6)
1148
1149 #
1150 # vxor state, state, w # addroundkey
1151 xxlor 32+29, 0, 0
1152 vxor 15, 30, 29 # IV + round key - add round key 0
1153
1154 cmpdi 9, 10
1155 beq Loop_aes_gcm_8x_dec
1156
1157 # load 2 more round keys (v11, v12)
1158 lxv 11, 0xb0(6)
1159 lxv 12, 0xc0(6)
1160
1161 cmpdi 9, 12
1162 beq Loop_aes_gcm_8x_dec
1163
1164 # load 2 more round keys (v11, v12, v13, v14)
1165 lxv 13, 0xd0(6)
1166 lxv 14, 0xe0(6)
1167 cmpdi 9, 14
1168 beq Loop_aes_gcm_8x_dec
1169
1170 b aes_gcm_out
1171
1172.align 5
1173Loop_aes_gcm_8x_dec:
1174 mr 14, 3
1175 mr 9, 4
1176
1177 #
1178 # check partial block
1179 #
1180Continue_partial_check_dec:
1181 ld 15, 56(7)
1182 cmpdi 15, 0
1183 beq Continue_dec
1184 bgt Final_block_dec
1185 cmpdi 15, 16
1186 blt Final_block_dec
1187
1188Continue_dec:
1189 # n blcoks
1190 li 10, 128
1191 divdu 10, 12, 10 # n 128 bytes-blocks
1192 cmpdi 10, 0
1193 beq Loop_last_block_dec
1194
1195 vaddudm 30, 30, 31 # IV + counter
1196 vxor 16, 30, 29
1197 vaddudm 30, 30, 31
1198 vxor 17, 30, 29
1199 vaddudm 30, 30, 31
1200 vxor 18, 30, 29
1201 vaddudm 30, 30, 31
1202 vxor 19, 30, 29
1203 vaddudm 30, 30, 31
1204 vxor 20, 30, 29
1205 vaddudm 30, 30, 31
1206 vxor 21, 30, 29
1207 vaddudm 30, 30, 31
1208 vxor 22, 30, 29
1209
1210 mtctr 10
1211
1212 li 15, 16
1213 li 16, 32
1214 li 17, 48
1215 li 18, 64
1216 li 19, 80
1217 li 20, 96
1218 li 21, 112
1219
1220 lwz 10, 240(6)
1221
1222Loop_8x_block_dec:
1223
1224 lxvb16x 15, 0, 14 # load block
1225 lxvb16x 16, 15, 14 # load block
1226 lxvb16x 17, 16, 14 # load block
1227 lxvb16x 18, 17, 14 # load block
1228 lxvb16x 19, 18, 14 # load block
1229 lxvb16x 20, 19, 14 # load block
1230 lxvb16x 21, 20, 14 # load block
1231 lxvb16x 22, 21, 14 # load block
1232 addi 14, 14, 128
1233
1234 Loop_aes_middle8x
1235
1236 xxlor 23+32, 10, 10
1237
1238 cmpdi 10, 10
1239 beq Do_next_ghash_dec
1240
1241 # 192 bits
1242 xxlor 24+32, 11, 11
1243
1244 vcipher 15, 15, 23
1245 vcipher 16, 16, 23
1246 vcipher 17, 17, 23
1247 vcipher 18, 18, 23
1248 vcipher 19, 19, 23
1249 vcipher 20, 20, 23
1250 vcipher 21, 21, 23
1251 vcipher 22, 22, 23
1252
1253 vcipher 15, 15, 24
1254 vcipher 16, 16, 24
1255 vcipher 17, 17, 24
1256 vcipher 18, 18, 24
1257 vcipher 19, 19, 24
1258 vcipher 20, 20, 24
1259 vcipher 21, 21, 24
1260 vcipher 22, 22, 24
1261
1262 xxlor 23+32, 12, 12
1263
1264 cmpdi 10, 12
1265 beq Do_next_ghash_dec
1266
1267 # 256 bits
1268 xxlor 24+32, 13, 13
1269
1270 vcipher 15, 15, 23
1271 vcipher 16, 16, 23
1272 vcipher 17, 17, 23
1273 vcipher 18, 18, 23
1274 vcipher 19, 19, 23
1275 vcipher 20, 20, 23
1276 vcipher 21, 21, 23
1277 vcipher 22, 22, 23
1278
1279 vcipher 15, 15, 24
1280 vcipher 16, 16, 24
1281 vcipher 17, 17, 24
1282 vcipher 18, 18, 24
1283 vcipher 19, 19, 24
1284 vcipher 20, 20, 24
1285 vcipher 21, 21, 24
1286 vcipher 22, 22, 24
1287
1288 xxlor 23+32, 14, 14
1289
1290 cmpdi 10, 14
1291 beq Do_next_ghash_dec
1292 b aes_gcm_out
1293
1294Do_next_ghash_dec:
1295
1296 #
1297 # last round
1298 vcipherlast 15, 15, 23
1299 vcipherlast 16, 16, 23
1300
1301 xxlxor 47, 47, 15
1302 stxvb16x 47, 0, 9 # store output
1303 xxlxor 48, 48, 16
1304 stxvb16x 48, 15, 9 # store output
1305
1306 vcipherlast 17, 17, 23
1307 vcipherlast 18, 18, 23
1308
1309 xxlxor 49, 49, 17
1310 stxvb16x 49, 16, 9 # store output
1311 xxlxor 50, 50, 18
1312 stxvb16x 50, 17, 9 # store output
1313
1314 vcipherlast 19, 19, 23
1315 vcipherlast 20, 20, 23
1316
1317 xxlxor 51, 51, 19
1318 stxvb16x 51, 18, 9 # store output
1319 xxlxor 52, 52, 20
1320 stxvb16x 52, 19, 9 # store output
1321
1322 vcipherlast 21, 21, 23
1323 vcipherlast 22, 22, 23
1324
1325 xxlxor 53, 53, 21
1326 stxvb16x 53, 20, 9 # store output
1327 xxlxor 54, 54, 22
1328 stxvb16x 54, 21, 9 # store output
1329
1330 addi 9, 9, 128
1331
1332 xxlor 15+32, 15, 15
1333 xxlor 16+32, 16, 16
1334 xxlor 17+32, 17, 17
1335 xxlor 18+32, 18, 18
1336 xxlor 19+32, 19, 19
1337 xxlor 20+32, 20, 20
1338 xxlor 21+32, 21, 21
1339 xxlor 22+32, 22, 22
1340
1341 # ghash here
1342 ppc_aes_gcm_ghash2_4x
1343
1344 xxlor 27+32, 0, 0
1345 vaddudm 30, 30, 31 # IV + counter
1346 vmr 29, 30
1347 vxor 15, 30, 27 # add round key
1348 vaddudm 30, 30, 31
1349 vxor 16, 30, 27
1350 vaddudm 30, 30, 31
1351 vxor 17, 30, 27
1352 vaddudm 30, 30, 31
1353 vxor 18, 30, 27
1354 vaddudm 30, 30, 31
1355 vxor 19, 30, 27
1356 vaddudm 30, 30, 31
1357 vxor 20, 30, 27
1358 vaddudm 30, 30, 31
1359 vxor 21, 30, 27
1360 vaddudm 30, 30, 31
1361 vxor 22, 30, 27
1362
1363 addi 12, 12, -128
1364 addi 11, 11, 128
1365
1366 bdnz Loop_8x_block_dec
1367
1368 vmr 30, 29
1369 stxvb16x 30+32, 0, 7 # update IV
1370
1371Loop_last_block_dec:
1372 cmpdi 12, 0
1373 beq aes_gcm_out
1374
1375 # loop last few blocks
1376 li 10, 16
1377 divdu 10, 12, 10
1378
1379 mtctr 10
1380
1381 lwz 10, 240(6)
1382
1383 cmpdi 12, 16
1384 blt Final_block_dec
1385
1386Next_rem_block_dec:
1387 lxvb16x 15, 0, 14 # load block
1388
1389 Loop_aes_middle_1x
1390
1391 xxlor 23+32, 10, 10
1392
1393 cmpdi 10, 10
1394 beq Do_next_1x_dec
1395
1396 # 192 bits
1397 xxlor 24+32, 11, 11
1398
1399 vcipher 15, 15, 23
1400 vcipher 15, 15, 24
1401
1402 xxlor 23+32, 12, 12
1403
1404 cmpdi 10, 12
1405 beq Do_next_1x_dec
1406
1407 # 256 bits
1408 xxlor 24+32, 13, 13
1409
1410 vcipher 15, 15, 23
1411 vcipher 15, 15, 24
1412
1413 xxlor 23+32, 14, 14
1414
1415 cmpdi 10, 14
1416 beq Do_next_1x_dec
1417
1418Do_next_1x_dec:
1419 vcipherlast 15, 15, 23
1420
1421 xxlxor 47, 47, 15
1422 stxvb16x 47, 0, 9 # store output
1423 addi 14, 14, 16
1424 addi 9, 9, 16
1425
1426 xxlor 28+32, 15, 15
1427 #vmr 28, 15
1428 ppc_update_hash_1x
1429
1430 addi 12, 12, -16
1431 addi 11, 11, 16
1432 xxlor 19+32, 0, 0
1433 vaddudm 30, 30, 31 # IV + counter
1434 vxor 15, 30, 19 # add round key
1435
1436 bdnz Next_rem_block_dec
1437
1438 li 15, 0
1439 std 15, 56(7) # clear partial?
1440 stxvb16x 30+32, 0, 7 # update IV
1441 cmpdi 12, 0
1442 beq aes_gcm_out
1443
1444Final_block_dec:
1445 lwz 10, 240(6)
1446 Loop_aes_middle_1x
1447
1448 xxlor 23+32, 10, 10
1449
1450 cmpdi 10, 10
1451 beq Do_final_1x_dec
1452
1453 # 192 bits
1454 xxlor 24+32, 11, 11
1455
1456 vcipher 15, 15, 23
1457 vcipher 15, 15, 24
1458
1459 xxlor 23+32, 12, 12
1460
1461 cmpdi 10, 12
1462 beq Do_final_1x_dec
1463
1464 # 256 bits
1465 xxlor 24+32, 13, 13
1466
1467 vcipher 15, 15, 23
1468 vcipher 15, 15, 24
1469
1470 xxlor 23+32, 14, 14
1471
1472 cmpdi 10, 14
1473 beq Do_final_1x_dec
1474
1475Do_final_1x_dec:
1476 vcipherlast 15, 15, 23
1477
1478 # check partial block
1479 li 21, 1 # decrypt
1480 ld 15, 56(7) # partial?
1481 cmpdi 15, 0
1482 beq Normal_block_dec
1483 bl Do_partial_block
1484 cmpdi 12, 0
1485 ble aes_gcm_out
1486
1487 b Continue_partial_check_dec
1488
1489Normal_block_dec:
1490 lxvb16x 15, 0, 14 # load last block
1491 xxlxor 47, 47, 15
1492
1493 # create partial block mask
1494 li 15, 16
1495 sub 15, 15, 12 # index to the mask
1496
1497 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
1498 vspltisb 17, 0 # second 16 bytes - 0x0000...00
1499 li 10, 192
1500 stvx 16, 10, 1
1501 addi 10, 10, 16
1502 stvx 17, 10, 1
1503
1504 addi 10, 1, 192
1505 lxvb16x 16, 15, 10 # load partial block mask
1506 xxland 47, 47, 16
1507
1508 xxland 32+28, 15, 16
1509 #vmr 28, 15
1510 ppc_update_hash_1x
1511
1512 # * should store only the remaining bytes.
1513 bl Write_partial_block
1514
1515 stxvb16x 30+32, 0, 7 # update IV
1516 std 12, 56(7) # update partial?
1517 li 16, 16
1518
1519 stxvb16x 32, 0, 8 # write out Xi
1520 stxvb16x 32, 16, 8 # write out Xi
1521 b aes_gcm_out
1522

source code of linux/arch/powerpc/crypto/aes-gcm-p10.S