1 | /* |
2 | * Copyright 2002, 2003 Andi Kleen, SuSE Labs. |
3 | * |
4 | * This file is subject to the terms and conditions of the GNU General Public |
5 | * License. See the file COPYING in the main directory of this archive |
6 | * for more details. No warranty for anything given at all. |
7 | */ |
8 | #include <linux/linkage.h> |
9 | #include <asm/errno.h> |
10 | #include <asm/asm.h> |
11 | |
12 | /* |
13 | * Checksum copy with exception handling. |
14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
15 | * destination is zeroed. |
16 | * |
17 | * Input |
18 | * rdi source |
19 | * rsi destination |
20 | * edx len (32bit) |
21 | * |
22 | * Output |
23 | * eax 64bit sum. undefined in case of exception. |
24 | * |
25 | * Wrappers need to take care of valid exception sum and zeroing. |
26 | * They also should align source or destination to 8 bytes. |
27 | */ |
28 | |
29 | .macro source |
30 | 10: |
31 | _ASM_EXTABLE_UA(10b, .Lfault) |
32 | .endm |
33 | |
34 | .macro dest |
35 | 20: |
36 | _ASM_EXTABLE_UA(20b, .Lfault) |
37 | .endm |
38 | |
39 | SYM_FUNC_START(csum_partial_copy_generic) |
40 | subq $5*8, %rsp |
41 | movq %rbx, 0*8(%rsp) |
42 | movq %r12, 1*8(%rsp) |
43 | movq %r14, 2*8(%rsp) |
44 | movq %r13, 3*8(%rsp) |
45 | movq %r15, 4*8(%rsp) |
46 | |
47 | movl $-1, %eax |
48 | xorl %r9d, %r9d |
49 | movl %edx, %ecx |
50 | cmpl $8, %ecx |
51 | jb .Lshort |
52 | |
53 | testb $7, %sil |
54 | jne .Lunaligned |
55 | .Laligned: |
56 | movl %ecx, %r12d |
57 | |
58 | shrq $6, %r12 |
59 | jz .Lhandle_tail /* < 64 */ |
60 | |
61 | clc |
62 | |
63 | /* main loop. clear in 64 byte blocks */ |
64 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
65 | /* r11: temp3, rdx: temp4, r12 loopcnt */ |
66 | /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ |
67 | .p2align 4 |
68 | .Lloop: |
69 | source |
70 | movq (%rdi), %rbx |
71 | source |
72 | movq 8(%rdi), %r8 |
73 | source |
74 | movq 16(%rdi), %r11 |
75 | source |
76 | movq 24(%rdi), %rdx |
77 | |
78 | source |
79 | movq 32(%rdi), %r10 |
80 | source |
81 | movq 40(%rdi), %r15 |
82 | source |
83 | movq 48(%rdi), %r14 |
84 | source |
85 | movq 56(%rdi), %r13 |
86 | |
87 | 30: |
88 | /* |
89 | * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a |
90 | * potentially unmapped kernel address. |
91 | */ |
92 | _ASM_EXTABLE(30b, 2f) |
93 | prefetcht0 5*64(%rdi) |
94 | 2: |
95 | adcq %rbx, %rax |
96 | adcq %r8, %rax |
97 | adcq %r11, %rax |
98 | adcq %rdx, %rax |
99 | adcq %r10, %rax |
100 | adcq %r15, %rax |
101 | adcq %r14, %rax |
102 | adcq %r13, %rax |
103 | |
104 | decl %r12d |
105 | |
106 | dest |
107 | movq %rbx, (%rsi) |
108 | dest |
109 | movq %r8, 8(%rsi) |
110 | dest |
111 | movq %r11, 16(%rsi) |
112 | dest |
113 | movq %rdx, 24(%rsi) |
114 | |
115 | dest |
116 | movq %r10, 32(%rsi) |
117 | dest |
118 | movq %r15, 40(%rsi) |
119 | dest |
120 | movq %r14, 48(%rsi) |
121 | dest |
122 | movq %r13, 56(%rsi) |
123 | |
124 | leaq 64(%rdi), %rdi |
125 | leaq 64(%rsi), %rsi |
126 | |
127 | jnz .Lloop |
128 | |
129 | adcq %r9, %rax |
130 | |
131 | /* do last up to 56 bytes */ |
132 | .Lhandle_tail: |
133 | /* ecx: count, rcx.63: the end result needs to be rol8 */ |
134 | movq %rcx, %r10 |
135 | andl $63, %ecx |
136 | shrl $3, %ecx |
137 | jz .Lfold |
138 | clc |
139 | .p2align 4 |
140 | .Lloop_8: |
141 | source |
142 | movq (%rdi), %rbx |
143 | adcq %rbx, %rax |
144 | decl %ecx |
145 | dest |
146 | movq %rbx, (%rsi) |
147 | leaq 8(%rsi), %rsi /* preserve carry */ |
148 | leaq 8(%rdi), %rdi |
149 | jnz .Lloop_8 |
150 | adcq %r9, %rax /* add in carry */ |
151 | |
152 | .Lfold: |
153 | /* reduce checksum to 32bits */ |
154 | movl %eax, %ebx |
155 | shrq $32, %rax |
156 | addl %ebx, %eax |
157 | adcl %r9d, %eax |
158 | |
159 | /* do last up to 6 bytes */ |
160 | .Lhandle_7: |
161 | movl %r10d, %ecx |
162 | andl $7, %ecx |
163 | .L1: /* .Lshort rejoins the common path here */ |
164 | shrl $1, %ecx |
165 | jz .Lhandle_1 |
166 | movl $2, %edx |
167 | xorl %ebx, %ebx |
168 | clc |
169 | .p2align 4 |
170 | .Lloop_1: |
171 | source |
172 | movw (%rdi), %bx |
173 | adcl %ebx, %eax |
174 | decl %ecx |
175 | dest |
176 | movw %bx, (%rsi) |
177 | leaq 2(%rdi), %rdi |
178 | leaq 2(%rsi), %rsi |
179 | jnz .Lloop_1 |
180 | adcl %r9d, %eax /* add in carry */ |
181 | |
182 | /* handle last odd byte */ |
183 | .Lhandle_1: |
184 | testb $1, %r10b |
185 | jz .Lende |
186 | xorl %ebx, %ebx |
187 | source |
188 | movb (%rdi), %bl |
189 | dest |
190 | movb %bl, (%rsi) |
191 | addl %ebx, %eax |
192 | adcl %r9d, %eax /* carry */ |
193 | |
194 | .Lende: |
195 | testq %r10, %r10 |
196 | js .Lwas_odd |
197 | .Lout: |
198 | movq 0*8(%rsp), %rbx |
199 | movq 1*8(%rsp), %r12 |
200 | movq 2*8(%rsp), %r14 |
201 | movq 3*8(%rsp), %r13 |
202 | movq 4*8(%rsp), %r15 |
203 | addq $5*8, %rsp |
204 | RET |
205 | .Lshort: |
206 | movl %ecx, %r10d |
207 | jmp .L1 |
208 | .Lunaligned: |
209 | xorl %ebx, %ebx |
210 | testb $1, %sil |
211 | jne .Lodd |
212 | 1: testb $2, %sil |
213 | je 2f |
214 | source |
215 | movw (%rdi), %bx |
216 | dest |
217 | movw %bx, (%rsi) |
218 | leaq 2(%rdi), %rdi |
219 | subq $2, %rcx |
220 | leaq 2(%rsi), %rsi |
221 | addq %rbx, %rax |
222 | 2: testb $4, %sil |
223 | je .Laligned |
224 | source |
225 | movl (%rdi), %ebx |
226 | dest |
227 | movl %ebx, (%rsi) |
228 | leaq 4(%rdi), %rdi |
229 | subq $4, %rcx |
230 | leaq 4(%rsi), %rsi |
231 | addq %rbx, %rax |
232 | jmp .Laligned |
233 | |
234 | .Lodd: |
235 | source |
236 | movb (%rdi), %bl |
237 | dest |
238 | movb %bl, (%rsi) |
239 | leaq 1(%rdi), %rdi |
240 | leaq 1(%rsi), %rsi |
241 | /* decrement, set MSB */ |
242 | leaq -1(%rcx, %rcx), %rcx |
243 | rorq $1, %rcx |
244 | shll $8, %ebx |
245 | addq %rbx, %rax |
246 | jmp 1b |
247 | |
248 | .Lwas_odd: |
249 | roll $8, %eax |
250 | jmp .Lout |
251 | |
252 | /* Exception: just return 0 */ |
253 | .Lfault: |
254 | xorl %eax, %eax |
255 | jmp .Lout |
256 | SYM_FUNC_END(csum_partial_copy_generic) |
257 | |