1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
4 | * operating system. INET is implemented using the BSD Socket |
5 | * interface as the means of communication with the user level. |
6 | * |
7 | * IP/TCP/UDP checksumming routines |
8 | * |
9 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> |
10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> |
11 | * Tom May, <ftom@netcom.com> |
12 | * Pentium Pro/II routines: |
13 | * Alexander Kjeldaas <astor@guardian.no> |
14 | * Finn Arne Gangstad <finnag@guardian.no> |
15 | * Lots of code moved from tcp.c and ip.c; see those files |
16 | * for more names. |
17 | * |
18 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception |
19 | * handling. |
20 | * Andi Kleen, add zeroing on error |
21 | * converted to pure assembler |
22 | */ |
23 | |
24 | #include <linux/export.h> |
25 | #include <linux/linkage.h> |
26 | #include <asm/errno.h> |
27 | #include <asm/asm.h> |
28 | #include <asm/nospec-branch.h> |
29 | |
30 | /* |
31 | * computes a partial checksum, e.g. for TCP/UDP fragments |
32 | */ |
33 | |
34 | /* |
35 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) |
36 | */ |
37 | |
38 | .text |
39 | |
40 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM |
41 | |
42 | /* |
43 | * Experiments with Ethernet and SLIP connections show that buff |
44 | * is aligned on either a 2-byte or 4-byte boundary. We get at |
45 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. |
46 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte |
47 | * alignment for the unrolled loop. |
48 | */ |
49 | SYM_FUNC_START(csum_partial) |
50 | pushl %esi |
51 | pushl %ebx |
52 | movl 20(%esp),%eax # Function arg: unsigned int sum |
53 | movl 16(%esp),%ecx # Function arg: int len |
54 | movl 12(%esp),%esi # Function arg: unsigned char *buff |
55 | testl $3, %esi # Check alignment. |
56 | jz 2f # Jump if alignment is ok. |
57 | testl $1, %esi # Check alignment. |
58 | jz 10f # Jump if alignment is boundary of 2 bytes. |
59 | |
60 | # buf is odd |
61 | dec %ecx |
62 | jl 8f |
63 | movzbl (%esi), %ebx |
64 | adcl %ebx, %eax |
65 | roll $8, %eax |
66 | inc %esi |
67 | testl $2, %esi |
68 | jz 2f |
69 | 10: |
70 | subl $2, %ecx # Alignment uses up two bytes. |
71 | jae 1f # Jump if we had at least two bytes. |
72 | addl $2, %ecx # ecx was < 2. Deal with it. |
73 | jmp 4f |
74 | 1: movw (%esi), %bx |
75 | addl $2, %esi |
76 | addw %bx, %ax |
77 | adcl $0, %eax |
78 | 2: |
79 | movl %ecx, %edx |
80 | shrl $5, %ecx |
81 | jz 2f |
82 | testl %esi, %esi |
83 | 1: movl (%esi), %ebx |
84 | adcl %ebx, %eax |
85 | movl 4(%esi), %ebx |
86 | adcl %ebx, %eax |
87 | movl 8(%esi), %ebx |
88 | adcl %ebx, %eax |
89 | movl 12(%esi), %ebx |
90 | adcl %ebx, %eax |
91 | movl 16(%esi), %ebx |
92 | adcl %ebx, %eax |
93 | movl 20(%esi), %ebx |
94 | adcl %ebx, %eax |
95 | movl 24(%esi), %ebx |
96 | adcl %ebx, %eax |
97 | movl 28(%esi), %ebx |
98 | adcl %ebx, %eax |
99 | lea 32(%esi), %esi |
100 | dec %ecx |
101 | jne 1b |
102 | adcl $0, %eax |
103 | 2: movl %edx, %ecx |
104 | andl $0x1c, %edx |
105 | je 4f |
106 | shrl $2, %edx # This clears CF |
107 | 3: adcl (%esi), %eax |
108 | lea 4(%esi), %esi |
109 | dec %edx |
110 | jne 3b |
111 | adcl $0, %eax |
112 | 4: andl $3, %ecx |
113 | jz 7f |
114 | cmpl $2, %ecx |
115 | jb 5f |
116 | movw (%esi),%cx |
117 | leal 2(%esi),%esi |
118 | je 6f |
119 | shll $16,%ecx |
120 | 5: movb (%esi),%cl |
121 | 6: addl %ecx,%eax |
122 | adcl $0, %eax |
123 | 7: |
124 | testb $1, 12(%esp) |
125 | jz 8f |
126 | roll $8, %eax |
127 | 8: |
128 | popl %ebx |
129 | popl %esi |
130 | RET |
131 | SYM_FUNC_END(csum_partial) |
132 | |
133 | #else |
134 | |
135 | /* Version for PentiumII/PPro */ |
136 | |
137 | SYM_FUNC_START(csum_partial) |
138 | pushl %esi |
139 | pushl %ebx |
140 | movl 20(%esp),%eax # Function arg: unsigned int sum |
141 | movl 16(%esp),%ecx # Function arg: int len |
142 | movl 12(%esp),%esi # Function arg: const unsigned char *buf |
143 | |
144 | testl $3, %esi |
145 | jnz 25f |
146 | 10: |
147 | movl %ecx, %edx |
148 | movl %ecx, %ebx |
149 | andl $0x7c, %ebx |
150 | shrl $7, %ecx |
151 | addl %ebx,%esi |
152 | shrl $2, %ebx |
153 | negl %ebx |
154 | lea 45f(%ebx,%ebx,2), %ebx |
155 | testl %esi, %esi |
156 | JMP_NOSPEC ebx |
157 | |
158 | # Handle 2-byte-aligned regions |
159 | 20: addw (%esi), %ax |
160 | lea 2(%esi), %esi |
161 | adcl $0, %eax |
162 | jmp 10b |
163 | 25: |
164 | testl $1, %esi |
165 | jz 30f |
166 | # buf is odd |
167 | dec %ecx |
168 | jl 90f |
169 | movzbl (%esi), %ebx |
170 | addl %ebx, %eax |
171 | adcl $0, %eax |
172 | roll $8, %eax |
173 | inc %esi |
174 | testl $2, %esi |
175 | jz 10b |
176 | |
177 | 30: subl $2, %ecx |
178 | ja 20b |
179 | je 32f |
180 | addl $2, %ecx |
181 | jz 80f |
182 | movzbl (%esi),%ebx # csumming 1 byte, 2-aligned |
183 | addl %ebx, %eax |
184 | adcl $0, %eax |
185 | jmp 80f |
186 | 32: |
187 | addw (%esi), %ax # csumming 2 bytes, 2-aligned |
188 | adcl $0, %eax |
189 | jmp 80f |
190 | |
191 | 40: |
192 | addl -128(%esi), %eax |
193 | adcl -124(%esi), %eax |
194 | adcl -120(%esi), %eax |
195 | adcl -116(%esi), %eax |
196 | adcl -112(%esi), %eax |
197 | adcl -108(%esi), %eax |
198 | adcl -104(%esi), %eax |
199 | adcl -100(%esi), %eax |
200 | adcl -96(%esi), %eax |
201 | adcl -92(%esi), %eax |
202 | adcl -88(%esi), %eax |
203 | adcl -84(%esi), %eax |
204 | adcl -80(%esi), %eax |
205 | adcl -76(%esi), %eax |
206 | adcl -72(%esi), %eax |
207 | adcl -68(%esi), %eax |
208 | adcl -64(%esi), %eax |
209 | adcl -60(%esi), %eax |
210 | adcl -56(%esi), %eax |
211 | adcl -52(%esi), %eax |
212 | adcl -48(%esi), %eax |
213 | adcl -44(%esi), %eax |
214 | adcl -40(%esi), %eax |
215 | adcl -36(%esi), %eax |
216 | adcl -32(%esi), %eax |
217 | adcl -28(%esi), %eax |
218 | adcl -24(%esi), %eax |
219 | adcl -20(%esi), %eax |
220 | adcl -16(%esi), %eax |
221 | adcl -12(%esi), %eax |
222 | adcl -8(%esi), %eax |
223 | adcl -4(%esi), %eax |
224 | 45: |
225 | lea 128(%esi), %esi |
226 | adcl $0, %eax |
227 | dec %ecx |
228 | jge 40b |
229 | movl %edx, %ecx |
230 | 50: andl $3, %ecx |
231 | jz 80f |
232 | |
233 | # Handle the last 1-3 bytes without jumping |
234 | notl %ecx # 1->2, 2->1, 3->0, higher bits are masked |
235 | movl $0xffffff,%ebx # by the shll and shrl instructions |
236 | shll $3,%ecx |
237 | shrl %cl,%ebx |
238 | andl -128(%esi),%ebx # esi is 4-aligned so should be ok |
239 | addl %ebx,%eax |
240 | adcl $0,%eax |
241 | 80: |
242 | testb $1, 12(%esp) |
243 | jz 90f |
244 | roll $8, %eax |
245 | 90: |
246 | popl %ebx |
247 | popl %esi |
248 | RET |
249 | SYM_FUNC_END(csum_partial) |
250 | |
251 | #endif |
252 | EXPORT_SYMBOL(csum_partial) |
253 | |
254 | /* |
255 | unsigned int csum_partial_copy_generic (const char *src, char *dst, |
256 | int len) |
257 | */ |
258 | |
259 | /* |
260 | * Copy from ds while checksumming, otherwise like csum_partial |
261 | */ |
262 | |
263 | #define EXC(y...) \ |
264 | 9999: y; \ |
265 | _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX) |
266 | |
267 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM |
268 | |
269 | #define ARGBASE 16 |
270 | #define FP 12 |
271 | |
272 | SYM_FUNC_START(csum_partial_copy_generic) |
273 | subl $4,%esp |
274 | pushl %edi |
275 | pushl %esi |
276 | pushl %ebx |
277 | movl ARGBASE+12(%esp),%ecx # len |
278 | movl ARGBASE+4(%esp),%esi # src |
279 | movl ARGBASE+8(%esp),%edi # dst |
280 | |
281 | movl $-1, %eax # sum |
282 | testl $2, %edi # Check alignment. |
283 | jz 2f # Jump if alignment is ok. |
284 | subl $2, %ecx # Alignment uses up two bytes. |
285 | jae 1f # Jump if we had at least two bytes. |
286 | addl $2, %ecx # ecx was < 2. Deal with it. |
287 | jmp 4f |
288 | EXC(1: movw (%esi), %bx ) |
289 | addl $2, %esi |
290 | EXC( movw %bx, (%edi) ) |
291 | addl $2, %edi |
292 | addw %bx, %ax |
293 | adcl $0, %eax |
294 | 2: |
295 | movl %ecx, FP(%esp) |
296 | shrl $5, %ecx |
297 | jz 2f |
298 | testl %esi, %esi # what's wrong with clc? |
299 | EXC(1: movl (%esi), %ebx ) |
300 | EXC( movl 4(%esi), %edx ) |
301 | adcl %ebx, %eax |
302 | EXC( movl %ebx, (%edi) ) |
303 | adcl %edx, %eax |
304 | EXC( movl %edx, 4(%edi) ) |
305 | |
306 | EXC( movl 8(%esi), %ebx ) |
307 | EXC( movl 12(%esi), %edx ) |
308 | adcl %ebx, %eax |
309 | EXC( movl %ebx, 8(%edi) ) |
310 | adcl %edx, %eax |
311 | EXC( movl %edx, 12(%edi) ) |
312 | |
313 | EXC( movl 16(%esi), %ebx ) |
314 | EXC( movl 20(%esi), %edx ) |
315 | adcl %ebx, %eax |
316 | EXC( movl %ebx, 16(%edi) ) |
317 | adcl %edx, %eax |
318 | EXC( movl %edx, 20(%edi) ) |
319 | |
320 | EXC( movl 24(%esi), %ebx ) |
321 | EXC( movl 28(%esi), %edx ) |
322 | adcl %ebx, %eax |
323 | EXC( movl %ebx, 24(%edi) ) |
324 | adcl %edx, %eax |
325 | EXC( movl %edx, 28(%edi) ) |
326 | |
327 | lea 32(%esi), %esi |
328 | lea 32(%edi), %edi |
329 | dec %ecx |
330 | jne 1b |
331 | adcl $0, %eax |
332 | 2: movl FP(%esp), %edx |
333 | movl %edx, %ecx |
334 | andl $0x1c, %edx |
335 | je 4f |
336 | shrl $2, %edx # This clears CF |
337 | EXC(3: movl (%esi), %ebx ) |
338 | adcl %ebx, %eax |
339 | EXC( movl %ebx, (%edi) ) |
340 | lea 4(%esi), %esi |
341 | lea 4(%edi), %edi |
342 | dec %edx |
343 | jne 3b |
344 | adcl $0, %eax |
345 | 4: andl $3, %ecx |
346 | jz 7f |
347 | cmpl $2, %ecx |
348 | jb 5f |
349 | EXC( movw (%esi), %cx ) |
350 | leal 2(%esi), %esi |
351 | EXC( movw %cx, (%edi) ) |
352 | leal 2(%edi), %edi |
353 | je 6f |
354 | shll $16,%ecx |
355 | EXC(5: movb (%esi), %cl ) |
356 | EXC( movb %cl, (%edi) ) |
357 | 6: addl %ecx, %eax |
358 | adcl $0, %eax |
359 | 7: |
360 | |
361 | popl %ebx |
362 | popl %esi |
363 | popl %edi |
364 | popl %ecx # equivalent to addl $4,%esp |
365 | RET |
366 | SYM_FUNC_END(csum_partial_copy_generic) |
367 | |
368 | #else |
369 | |
370 | /* Version for PentiumII/PPro */ |
371 | |
372 | #define ROUND1(x) \ |
373 | EXC(movl x(%esi), %ebx ) ; \ |
374 | addl %ebx, %eax ; \ |
375 | EXC(movl %ebx, x(%edi) ) ; |
376 | |
377 | #define ROUND(x) \ |
378 | EXC(movl x(%esi), %ebx ) ; \ |
379 | adcl %ebx, %eax ; \ |
380 | EXC(movl %ebx, x(%edi) ) ; |
381 | |
382 | #define ARGBASE 12 |
383 | |
384 | SYM_FUNC_START(csum_partial_copy_generic) |
385 | pushl %ebx |
386 | pushl %edi |
387 | pushl %esi |
388 | movl ARGBASE+4(%esp),%esi #src |
389 | movl ARGBASE+8(%esp),%edi #dst |
390 | movl ARGBASE+12(%esp),%ecx #len |
391 | movl $-1, %eax #sum |
392 | # movl %ecx, %edx |
393 | movl %ecx, %ebx |
394 | movl %esi, %edx |
395 | shrl $6, %ecx |
396 | andl $0x3c, %ebx |
397 | negl %ebx |
398 | subl %ebx, %esi |
399 | subl %ebx, %edi |
400 | lea -1(%esi),%edx |
401 | andl $-32,%edx |
402 | lea 3f(%ebx,%ebx), %ebx |
403 | testl %esi, %esi |
404 | JMP_NOSPEC ebx |
405 | 1: addl $64,%esi |
406 | addl $64,%edi |
407 | EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl) |
408 | ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) |
409 | ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) |
410 | ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) |
411 | ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) |
412 | 3: adcl $0,%eax |
413 | addl $64, %edx |
414 | dec %ecx |
415 | jge 1b |
416 | 4: movl ARGBASE+12(%esp),%edx #len |
417 | andl $3, %edx |
418 | jz 7f |
419 | cmpl $2, %edx |
420 | jb 5f |
421 | EXC( movw (%esi), %dx ) |
422 | leal 2(%esi), %esi |
423 | EXC( movw %dx, (%edi) ) |
424 | leal 2(%edi), %edi |
425 | je 6f |
426 | shll $16,%edx |
427 | 5: |
428 | EXC( movb (%esi), %dl ) |
429 | EXC( movb %dl, (%edi) ) |
430 | 6: addl %edx, %eax |
431 | adcl $0, %eax |
432 | 7: |
433 | |
434 | popl %esi |
435 | popl %edi |
436 | popl %ebx |
437 | RET |
438 | SYM_FUNC_END(csum_partial_copy_generic) |
439 | |
440 | #undef ROUND |
441 | #undef ROUND1 |
442 | |
443 | #endif |
444 | EXPORT_SYMBOL(csum_partial_copy_generic) |
445 | |