1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * This file contains assembly-language implementations
4 * of IP-style 1's complement checksum routines.
5 *
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 *
8 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
9 */
10
11#include <linux/export.h>
12#include <linux/sys.h>
13#include <asm/processor.h>
14#include <asm/cache.h>
15#include <asm/errno.h>
16#include <asm/ppc_asm.h>
17
18 .text
19
20/*
21 * computes the checksum of a memory block at buff, length len,
22 * and adds in "sum" (32-bit)
23 *
24 * __csum_partial(buff, len, sum)
25 */
26_GLOBAL(__csum_partial)
27 subi r3,r3,4
28 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */
29 beq 3f /* if we're doing < 4 bytes */
30 andi. r0,r3,2 /* Align buffer to longword boundary */
31 beq+ 1f
32 lhz r0,4(r3) /* do 2 bytes to get aligned */
33 subi r4,r4,2
34 addi r3,r3,2
35 srwi. r6,r4,2 /* # words to do */
36 adde r5,r5,r0
37 beq 3f
381: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */
39 beq 21f
40 mtctr r6
412: lwzu r0,4(r3)
42 adde r5,r5,r0
43 bdnz 2b
4421: srwi. r6,r4,4 /* # blocks of 4 words to do */
45 beq 3f
46 lwz r0,4(r3)
47 mtctr r6
48 lwz r6,8(r3)
49 adde r5,r5,r0
50 lwz r7,12(r3)
51 adde r5,r5,r6
52 lwzu r8,16(r3)
53 adde r5,r5,r7
54 bdz 23f
5522: lwz r0,4(r3)
56 adde r5,r5,r8
57 lwz r6,8(r3)
58 adde r5,r5,r0
59 lwz r7,12(r3)
60 adde r5,r5,r6
61 lwzu r8,16(r3)
62 adde r5,r5,r7
63 bdnz 22b
6423: adde r5,r5,r8
653: andi. r0,r4,2
66 beq+ 4f
67 lhz r0,4(r3)
68 addi r3,r3,2
69 adde r5,r5,r0
704: andi. r0,r4,1
71 beq+ 5f
72 lbz r0,4(r3)
73 slwi r0,r0,8 /* Upper byte of word */
74 adde r5,r5,r0
755: addze r3,r5 /* add in final carry */
76 blr
77EXPORT_SYMBOL(__csum_partial)
78
79/*
80 * Computes the checksum of a memory block at src, length len,
81 * and adds in 0xffffffff, while copying the block to dst.
82 * If an access exception occurs it returns zero.
83 *
84 * csum_partial_copy_generic(src, dst, len)
85 */
86#define CSUM_COPY_16_BYTES_WITHEX(n) \
878 ## n ## 0: \
88 lwz r7,4(r4); \
898 ## n ## 1: \
90 lwz r8,8(r4); \
918 ## n ## 2: \
92 lwz r9,12(r4); \
938 ## n ## 3: \
94 lwzu r10,16(r4); \
958 ## n ## 4: \
96 stw r7,4(r6); \
97 adde r12,r12,r7; \
988 ## n ## 5: \
99 stw r8,8(r6); \
100 adde r12,r12,r8; \
1018 ## n ## 6: \
102 stw r9,12(r6); \
103 adde r12,r12,r9; \
1048 ## n ## 7: \
105 stwu r10,16(r6); \
106 adde r12,r12,r10
107
108#define CSUM_COPY_16_BYTES_EXCODE(n) \
109 EX_TABLE(8 ## n ## 0b, fault); \
110 EX_TABLE(8 ## n ## 1b, fault); \
111 EX_TABLE(8 ## n ## 2b, fault); \
112 EX_TABLE(8 ## n ## 3b, fault); \
113 EX_TABLE(8 ## n ## 4b, fault); \
114 EX_TABLE(8 ## n ## 5b, fault); \
115 EX_TABLE(8 ## n ## 6b, fault); \
116 EX_TABLE(8 ## n ## 7b, fault);
117
118 .text
119
120CACHELINE_BYTES = L1_CACHE_BYTES
121LG_CACHELINE_BYTES = L1_CACHE_SHIFT
122CACHELINE_MASK = (L1_CACHE_BYTES-1)
123
124_GLOBAL(csum_partial_copy_generic)
125 li r12,-1
126 addic r0,r0,0 /* clear carry */
127 addi r6,r4,-4
128 neg r0,r4
129 addi r4,r3,-4
130 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
131 crset 4*cr7+eq
132 beq 58f
133
134 cmplw 0,r5,r0 /* is this more than total to do? */
135 blt 63f /* if not much to do */
136 rlwinm r7,r6,3,0x8
137 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */
138 cmplwi cr7,r7,0 /* is destination address even ? */
139 andi. r8,r0,3 /* get it word-aligned first */
140 mtctr r8
141 beq+ 61f
142 li r3,0
14370: lbz r9,4(r4) /* do some bytes */
144 addi r4,r4,1
145 slwi r3,r3,8
146 rlwimi r3,r9,0,24,31
14771: stb r9,4(r6)
148 addi r6,r6,1
149 bdnz 70b
150 adde r12,r12,r3
15161: subf r5,r0,r5
152 srwi. r0,r0,2
153 mtctr r0
154 beq 58f
15572: lwzu r9,4(r4) /* do some words */
156 adde r12,r12,r9
15773: stwu r9,4(r6)
158 bdnz 72b
159
16058: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
161 clrlwi r5,r5,32-LG_CACHELINE_BYTES
162 li r11,4
163 beq 63f
164
165 /* Here we decide how far ahead to prefetch the source */
166 li r3,4
167 cmpwi r0,1
168 li r7,0
169 ble 114f
170 li r7,1
171#if MAX_COPY_PREFETCH > 1
172 /* Heuristically, for large transfers we prefetch
173 MAX_COPY_PREFETCH cachelines ahead. For small transfers
174 we prefetch 1 cacheline ahead. */
175 cmpwi r0,MAX_COPY_PREFETCH
176 ble 112f
177 li r7,MAX_COPY_PREFETCH
178112: mtctr r7
179111: dcbt r3,r4
180 addi r3,r3,CACHELINE_BYTES
181 bdnz 111b
182#else
183 dcbt r3,r4
184 addi r3,r3,CACHELINE_BYTES
185#endif /* MAX_COPY_PREFETCH > 1 */
186
187114: subf r8,r7,r0
188 mr r0,r7
189 mtctr r8
190
19153: dcbt r3,r4
19254: dcbz r11,r6
193/* the main body of the cacheline loop */
194 CSUM_COPY_16_BYTES_WITHEX(0)
195#if L1_CACHE_BYTES >= 32
196 CSUM_COPY_16_BYTES_WITHEX(1)
197#if L1_CACHE_BYTES >= 64
198 CSUM_COPY_16_BYTES_WITHEX(2)
199 CSUM_COPY_16_BYTES_WITHEX(3)
200#if L1_CACHE_BYTES >= 128
201 CSUM_COPY_16_BYTES_WITHEX(4)
202 CSUM_COPY_16_BYTES_WITHEX(5)
203 CSUM_COPY_16_BYTES_WITHEX(6)
204 CSUM_COPY_16_BYTES_WITHEX(7)
205#endif
206#endif
207#endif
208 bdnz 53b
209 cmpwi r0,0
210 li r3,4
211 li r7,0
212 bne 114b
213
21463: srwi. r0,r5,2
215 mtctr r0
216 beq 64f
21730: lwzu r0,4(r4)
218 adde r12,r12,r0
21931: stwu r0,4(r6)
220 bdnz 30b
221
22264: andi. r0,r5,2
223 beq+ 65f
22440: lhz r0,4(r4)
225 addi r4,r4,2
22641: sth r0,4(r6)
227 adde r12,r12,r0
228 addi r6,r6,2
22965: andi. r0,r5,1
230 beq+ 66f
23150: lbz r0,4(r4)
23251: stb r0,4(r6)
233 slwi r0,r0,8
234 adde r12,r12,r0
23566: addze r3,r12
236 beqlr+ cr7
237 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
238 blr
239
240fault:
241 li r3,0
242 blr
243
244 EX_TABLE(70b, fault);
245 EX_TABLE(71b, fault);
246 EX_TABLE(72b, fault);
247 EX_TABLE(73b, fault);
248 EX_TABLE(54b, fault);
249
250/*
251 * this stuff handles faults in the cacheline loop and branches to either
252 * fault (if in read part) or fault (if in write part)
253 */
254 CSUM_COPY_16_BYTES_EXCODE(0)
255#if L1_CACHE_BYTES >= 32
256 CSUM_COPY_16_BYTES_EXCODE(1)
257#if L1_CACHE_BYTES >= 64
258 CSUM_COPY_16_BYTES_EXCODE(2)
259 CSUM_COPY_16_BYTES_EXCODE(3)
260#if L1_CACHE_BYTES >= 128
261 CSUM_COPY_16_BYTES_EXCODE(4)
262 CSUM_COPY_16_BYTES_EXCODE(5)
263 CSUM_COPY_16_BYTES_EXCODE(6)
264 CSUM_COPY_16_BYTES_EXCODE(7)
265#endif
266#endif
267#endif
268
269 EX_TABLE(30b, fault);
270 EX_TABLE(31b, fault);
271 EX_TABLE(40b, fault);
272 EX_TABLE(41b, fault);
273 EX_TABLE(50b, fault);
274 EX_TABLE(51b, fault);
275
276EXPORT_SYMBOL(csum_partial_copy_generic)
277
278/*
279 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
280 * const struct in6_addr *daddr,
281 * __u32 len, __u8 proto, __wsum sum)
282 */
283
284_GLOBAL(csum_ipv6_magic)
285 lwz r8, 0(r3)
286 lwz r9, 4(r3)
287 addc r0, r7, r8
288 lwz r10, 8(r3)
289 adde r0, r0, r9
290 lwz r11, 12(r3)
291 adde r0, r0, r10
292 lwz r8, 0(r4)
293 adde r0, r0, r11
294 lwz r9, 4(r4)
295 adde r0, r0, r8
296 lwz r10, 8(r4)
297 adde r0, r0, r9
298 lwz r11, 12(r4)
299 adde r0, r0, r10
300 add r5, r5, r6 /* assumption: len + proto doesn't carry */
301 adde r0, r0, r11
302 adde r0, r0, r5
303 addze r0, r0
304 rotlwi r3, r0, 16
305 add r3, r0, r3
306 not r3, r3
307 rlwinm r3, r3, 16, 16, 31
308 blr
309EXPORT_SYMBOL(csum_ipv6_magic)
310

source code of linux/arch/powerpc/lib/checksum_32.S