1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * NH - ε-almost-universal hash function, ARM64 NEON accelerated version
4 *
5 * Copyright 2018 Google LLC
6 *
7 * Author: Eric Biggers <ebiggers@google.com>
8 */
9
10#include <linux/linkage.h>
11#include <linux/cfi_types.h>
12
13 KEY .req x0
14 MESSAGE .req x1
15 MESSAGE_LEN .req x2
16 HASH .req x3
17
18 PASS0_SUMS .req v0
19 PASS1_SUMS .req v1
20 PASS2_SUMS .req v2
21 PASS3_SUMS .req v3
22 K0 .req v4
23 K1 .req v5
24 K2 .req v6
25 K3 .req v7
26 T0 .req v8
27 T1 .req v9
28 T2 .req v10
29 T3 .req v11
30 T4 .req v12
31 T5 .req v13
32 T6 .req v14
33 T7 .req v15
34
35.macro _nh_stride k0, k1, k2, k3
36
37 // Load next message stride
38 ld1 {T3.16b}, [MESSAGE], #16
39
40 // Load next key stride
41 ld1 {\k3\().4s}, [KEY], #16
42
43 // Add message words to key words
44 add T0.4s, T3.4s, \k0\().4s
45 add T1.4s, T3.4s, \k1\().4s
46 add T2.4s, T3.4s, \k2\().4s
47 add T3.4s, T3.4s, \k3\().4s
48
49 // Multiply 32x32 => 64 and accumulate
50 mov T4.d[0], T0.d[1]
51 mov T5.d[0], T1.d[1]
52 mov T6.d[0], T2.d[1]
53 mov T7.d[0], T3.d[1]
54 umlal PASS0_SUMS.2d, T0.2s, T4.2s
55 umlal PASS1_SUMS.2d, T1.2s, T5.2s
56 umlal PASS2_SUMS.2d, T2.2s, T6.2s
57 umlal PASS3_SUMS.2d, T3.2s, T7.2s
58.endm
59
60/*
61 * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
62 * __le64 hash[NH_NUM_PASSES])
63 *
64 * It's guaranteed that message_len % 16 == 0.
65 */
66SYM_TYPED_FUNC_START(nh_neon)
67
68 ld1 {K0.4s,K1.4s}, [KEY], #32
69 movi PASS0_SUMS.2d, #0
70 movi PASS1_SUMS.2d, #0
71 ld1 {K2.4s}, [KEY], #16
72 movi PASS2_SUMS.2d, #0
73 movi PASS3_SUMS.2d, #0
74
75 subs MESSAGE_LEN, MESSAGE_LEN, #64
76 blt .Lloop4_done
77.Lloop4:
78 _nh_stride K0, K1, K2, K3
79 _nh_stride K1, K2, K3, K0
80 _nh_stride K2, K3, K0, K1
81 _nh_stride K3, K0, K1, K2
82 subs MESSAGE_LEN, MESSAGE_LEN, #64
83 bge .Lloop4
84
85.Lloop4_done:
86 ands MESSAGE_LEN, MESSAGE_LEN, #63
87 beq .Ldone
88 _nh_stride K0, K1, K2, K3
89
90 subs MESSAGE_LEN, MESSAGE_LEN, #16
91 beq .Ldone
92 _nh_stride K1, K2, K3, K0
93
94 subs MESSAGE_LEN, MESSAGE_LEN, #16
95 beq .Ldone
96 _nh_stride K2, K3, K0, K1
97
98.Ldone:
99 // Sum the accumulators for each pass, then store the sums to 'hash'
100 addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
101 addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
102 st1 {T0.16b,T1.16b}, [HASH]
103 ret
104SYM_FUNC_END(nh_neon)
105

source code of linux/arch/arm64/crypto/nh-neon-core.S