1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * arch/x86_64/lib/csum-partial.c |
4 | * |
5 | * This file contains network checksum routines that are better done |
6 | * in an architecture-specific manner due to speed. |
7 | */ |
8 | |
9 | #include <linux/compiler.h> |
10 | #include <linux/export.h> |
11 | #include <asm/checksum.h> |
12 | #include <asm/word-at-a-time.h> |
13 | |
14 | static inline __wsum csum_finalize_sum(u64 temp64) |
15 | { |
16 | return (__force __wsum)((temp64 + ror64(word: temp64, shift: 32)) >> 32); |
17 | } |
18 | |
19 | static inline unsigned long update_csum_40b(unsigned long sum, const unsigned long m[5]) |
20 | { |
21 | asm("addq %1,%0\n\t" |
22 | "adcq %2,%0\n\t" |
23 | "adcq %3,%0\n\t" |
24 | "adcq %4,%0\n\t" |
25 | "adcq %5,%0\n\t" |
26 | "adcq $0,%0" |
27 | :"+r" (sum) |
28 | :"m" (m[0]), "m" (m[1]), "m" (m[2]), |
29 | "m" (m[3]), "m" (m[4])); |
30 | return sum; |
31 | } |
32 | |
33 | /* |
34 | * Do a checksum on an arbitrary memory area. |
35 | * Returns a 32bit checksum. |
36 | * |
37 | * This isn't as time critical as it used to be because many NICs |
38 | * do hardware checksumming these days. |
39 | * |
40 | * Still, with CHECKSUM_COMPLETE this is called to compute |
41 | * checksums on IPv6 headers (40 bytes) and other small parts. |
42 | * it's best to have buff aligned on a 64-bit boundary |
43 | */ |
44 | __wsum csum_partial(const void *buff, int len, __wsum sum) |
45 | { |
46 | u64 temp64 = (__force u64)sum; |
47 | |
48 | /* Do two 40-byte chunks in parallel to get better ILP */ |
49 | if (likely(len >= 80)) { |
50 | u64 temp64_2 = 0; |
51 | do { |
52 | temp64 = update_csum_40b(sum: temp64, m: buff); |
53 | temp64_2 = update_csum_40b(sum: temp64_2, m: buff + 40); |
54 | buff += 80; |
55 | len -= 80; |
56 | } while (len >= 80); |
57 | |
58 | asm("addq %1,%0\n\t" |
59 | "adcq $0,%0" |
60 | :"+r" (temp64): "r" (temp64_2)); |
61 | } |
62 | |
63 | /* |
64 | * len == 40 is the hot case due to IPv6 headers, so return |
65 | * early for that exact case without checking the tail bytes. |
66 | */ |
67 | if (len >= 40) { |
68 | temp64 = update_csum_40b(sum: temp64, m: buff); |
69 | len -= 40; |
70 | if (!len) |
71 | return csum_finalize_sum(temp64); |
72 | buff += 40; |
73 | } |
74 | |
75 | if (len & 32) { |
76 | asm("addq 0*8(%[src]),%[res]\n\t" |
77 | "adcq 1*8(%[src]),%[res]\n\t" |
78 | "adcq 2*8(%[src]),%[res]\n\t" |
79 | "adcq 3*8(%[src]),%[res]\n\t" |
80 | "adcq $0,%[res]" |
81 | : [res] "+r" (temp64) |
82 | : [src] "r" (buff), "m" (*(const char(*)[32])buff)); |
83 | buff += 32; |
84 | } |
85 | if (len & 16) { |
86 | asm("addq 0*8(%[src]),%[res]\n\t" |
87 | "adcq 1*8(%[src]),%[res]\n\t" |
88 | "adcq $0,%[res]" |
89 | : [res] "+r" (temp64) |
90 | : [src] "r" (buff), "m" (*(const char(*)[16])buff)); |
91 | buff += 16; |
92 | } |
93 | if (len & 8) { |
94 | asm("addq 0*8(%[src]),%[res]\n\t" |
95 | "adcq $0,%[res]" |
96 | : [res] "+r" (temp64) |
97 | : [src] "r" (buff), "m" (*(const char(*)[8])buff)); |
98 | buff += 8; |
99 | } |
100 | if (len & 7) { |
101 | unsigned int shift = (-len << 3) & 63; |
102 | unsigned long trail; |
103 | |
104 | trail = (load_unaligned_zeropad(addr: buff) << shift) >> shift; |
105 | |
106 | asm("addq %[trail],%[res]\n\t" |
107 | "adcq $0,%[res]" |
108 | : [res] "+r" (temp64) |
109 | : [trail] "r" (trail)); |
110 | } |
111 | return csum_finalize_sum(temp64); |
112 | } |
113 | EXPORT_SYMBOL(csum_partial); |
114 | |
115 | /* |
116 | * this routine is used for miscellaneous IP-like checksums, mainly |
117 | * in icmp.c |
118 | */ |
119 | __sum16 ip_compute_csum(const void *buff, int len) |
120 | { |
121 | return csum_fold(sum: csum_partial(buff, len, 0)); |
122 | } |
123 | EXPORT_SYMBOL(ip_compute_csum); |
124 | |