1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * include/asm-generic/xor.h |
4 | * |
5 | * Generic optimized RAID-5 checksumming functions. |
6 | */ |
7 | |
8 | #include <linux/prefetch.h> |
9 | |
10 | static void |
11 | xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1, |
12 | const unsigned long * __restrict p2) |
13 | { |
14 | long lines = bytes / (sizeof (long)) / 8; |
15 | |
16 | do { |
17 | p1[0] ^= p2[0]; |
18 | p1[1] ^= p2[1]; |
19 | p1[2] ^= p2[2]; |
20 | p1[3] ^= p2[3]; |
21 | p1[4] ^= p2[4]; |
22 | p1[5] ^= p2[5]; |
23 | p1[6] ^= p2[6]; |
24 | p1[7] ^= p2[7]; |
25 | p1 += 8; |
26 | p2 += 8; |
27 | } while (--lines > 0); |
28 | } |
29 | |
30 | static void |
31 | xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1, |
32 | const unsigned long * __restrict p2, |
33 | const unsigned long * __restrict p3) |
34 | { |
35 | long lines = bytes / (sizeof (long)) / 8; |
36 | |
37 | do { |
38 | p1[0] ^= p2[0] ^ p3[0]; |
39 | p1[1] ^= p2[1] ^ p3[1]; |
40 | p1[2] ^= p2[2] ^ p3[2]; |
41 | p1[3] ^= p2[3] ^ p3[3]; |
42 | p1[4] ^= p2[4] ^ p3[4]; |
43 | p1[5] ^= p2[5] ^ p3[5]; |
44 | p1[6] ^= p2[6] ^ p3[6]; |
45 | p1[7] ^= p2[7] ^ p3[7]; |
46 | p1 += 8; |
47 | p2 += 8; |
48 | p3 += 8; |
49 | } while (--lines > 0); |
50 | } |
51 | |
52 | static void |
53 | xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1, |
54 | const unsigned long * __restrict p2, |
55 | const unsigned long * __restrict p3, |
56 | const unsigned long * __restrict p4) |
57 | { |
58 | long lines = bytes / (sizeof (long)) / 8; |
59 | |
60 | do { |
61 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; |
62 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; |
63 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; |
64 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; |
65 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; |
66 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; |
67 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; |
68 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; |
69 | p1 += 8; |
70 | p2 += 8; |
71 | p3 += 8; |
72 | p4 += 8; |
73 | } while (--lines > 0); |
74 | } |
75 | |
76 | static void |
77 | xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1, |
78 | const unsigned long * __restrict p2, |
79 | const unsigned long * __restrict p3, |
80 | const unsigned long * __restrict p4, |
81 | const unsigned long * __restrict p5) |
82 | { |
83 | long lines = bytes / (sizeof (long)) / 8; |
84 | |
85 | do { |
86 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; |
87 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; |
88 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; |
89 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; |
90 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; |
91 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; |
92 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; |
93 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; |
94 | p1 += 8; |
95 | p2 += 8; |
96 | p3 += 8; |
97 | p4 += 8; |
98 | p5 += 8; |
99 | } while (--lines > 0); |
100 | } |
101 | |
102 | static void |
103 | xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1, |
104 | const unsigned long * __restrict p2) |
105 | { |
106 | long lines = bytes / (sizeof (long)) / 8; |
107 | |
108 | do { |
109 | register long d0, d1, d2, d3, d4, d5, d6, d7; |
110 | d0 = p1[0]; /* Pull the stuff into registers */ |
111 | d1 = p1[1]; /* ... in bursts, if possible. */ |
112 | d2 = p1[2]; |
113 | d3 = p1[3]; |
114 | d4 = p1[4]; |
115 | d5 = p1[5]; |
116 | d6 = p1[6]; |
117 | d7 = p1[7]; |
118 | d0 ^= p2[0]; |
119 | d1 ^= p2[1]; |
120 | d2 ^= p2[2]; |
121 | d3 ^= p2[3]; |
122 | d4 ^= p2[4]; |
123 | d5 ^= p2[5]; |
124 | d6 ^= p2[6]; |
125 | d7 ^= p2[7]; |
126 | p1[0] = d0; /* Store the result (in bursts) */ |
127 | p1[1] = d1; |
128 | p1[2] = d2; |
129 | p1[3] = d3; |
130 | p1[4] = d4; |
131 | p1[5] = d5; |
132 | p1[6] = d6; |
133 | p1[7] = d7; |
134 | p1 += 8; |
135 | p2 += 8; |
136 | } while (--lines > 0); |
137 | } |
138 | |
139 | static void |
140 | xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1, |
141 | const unsigned long * __restrict p2, |
142 | const unsigned long * __restrict p3) |
143 | { |
144 | long lines = bytes / (sizeof (long)) / 8; |
145 | |
146 | do { |
147 | register long d0, d1, d2, d3, d4, d5, d6, d7; |
148 | d0 = p1[0]; /* Pull the stuff into registers */ |
149 | d1 = p1[1]; /* ... in bursts, if possible. */ |
150 | d2 = p1[2]; |
151 | d3 = p1[3]; |
152 | d4 = p1[4]; |
153 | d5 = p1[5]; |
154 | d6 = p1[6]; |
155 | d7 = p1[7]; |
156 | d0 ^= p2[0]; |
157 | d1 ^= p2[1]; |
158 | d2 ^= p2[2]; |
159 | d3 ^= p2[3]; |
160 | d4 ^= p2[4]; |
161 | d5 ^= p2[5]; |
162 | d6 ^= p2[6]; |
163 | d7 ^= p2[7]; |
164 | d0 ^= p3[0]; |
165 | d1 ^= p3[1]; |
166 | d2 ^= p3[2]; |
167 | d3 ^= p3[3]; |
168 | d4 ^= p3[4]; |
169 | d5 ^= p3[5]; |
170 | d6 ^= p3[6]; |
171 | d7 ^= p3[7]; |
172 | p1[0] = d0; /* Store the result (in bursts) */ |
173 | p1[1] = d1; |
174 | p1[2] = d2; |
175 | p1[3] = d3; |
176 | p1[4] = d4; |
177 | p1[5] = d5; |
178 | p1[6] = d6; |
179 | p1[7] = d7; |
180 | p1 += 8; |
181 | p2 += 8; |
182 | p3 += 8; |
183 | } while (--lines > 0); |
184 | } |
185 | |
186 | static void |
187 | xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1, |
188 | const unsigned long * __restrict p2, |
189 | const unsigned long * __restrict p3, |
190 | const unsigned long * __restrict p4) |
191 | { |
192 | long lines = bytes / (sizeof (long)) / 8; |
193 | |
194 | do { |
195 | register long d0, d1, d2, d3, d4, d5, d6, d7; |
196 | d0 = p1[0]; /* Pull the stuff into registers */ |
197 | d1 = p1[1]; /* ... in bursts, if possible. */ |
198 | d2 = p1[2]; |
199 | d3 = p1[3]; |
200 | d4 = p1[4]; |
201 | d5 = p1[5]; |
202 | d6 = p1[6]; |
203 | d7 = p1[7]; |
204 | d0 ^= p2[0]; |
205 | d1 ^= p2[1]; |
206 | d2 ^= p2[2]; |
207 | d3 ^= p2[3]; |
208 | d4 ^= p2[4]; |
209 | d5 ^= p2[5]; |
210 | d6 ^= p2[6]; |
211 | d7 ^= p2[7]; |
212 | d0 ^= p3[0]; |
213 | d1 ^= p3[1]; |
214 | d2 ^= p3[2]; |
215 | d3 ^= p3[3]; |
216 | d4 ^= p3[4]; |
217 | d5 ^= p3[5]; |
218 | d6 ^= p3[6]; |
219 | d7 ^= p3[7]; |
220 | d0 ^= p4[0]; |
221 | d1 ^= p4[1]; |
222 | d2 ^= p4[2]; |
223 | d3 ^= p4[3]; |
224 | d4 ^= p4[4]; |
225 | d5 ^= p4[5]; |
226 | d6 ^= p4[6]; |
227 | d7 ^= p4[7]; |
228 | p1[0] = d0; /* Store the result (in bursts) */ |
229 | p1[1] = d1; |
230 | p1[2] = d2; |
231 | p1[3] = d3; |
232 | p1[4] = d4; |
233 | p1[5] = d5; |
234 | p1[6] = d6; |
235 | p1[7] = d7; |
236 | p1 += 8; |
237 | p2 += 8; |
238 | p3 += 8; |
239 | p4 += 8; |
240 | } while (--lines > 0); |
241 | } |
242 | |
243 | static void |
244 | xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1, |
245 | const unsigned long * __restrict p2, |
246 | const unsigned long * __restrict p3, |
247 | const unsigned long * __restrict p4, |
248 | const unsigned long * __restrict p5) |
249 | { |
250 | long lines = bytes / (sizeof (long)) / 8; |
251 | |
252 | do { |
253 | register long d0, d1, d2, d3, d4, d5, d6, d7; |
254 | d0 = p1[0]; /* Pull the stuff into registers */ |
255 | d1 = p1[1]; /* ... in bursts, if possible. */ |
256 | d2 = p1[2]; |
257 | d3 = p1[3]; |
258 | d4 = p1[4]; |
259 | d5 = p1[5]; |
260 | d6 = p1[6]; |
261 | d7 = p1[7]; |
262 | d0 ^= p2[0]; |
263 | d1 ^= p2[1]; |
264 | d2 ^= p2[2]; |
265 | d3 ^= p2[3]; |
266 | d4 ^= p2[4]; |
267 | d5 ^= p2[5]; |
268 | d6 ^= p2[6]; |
269 | d7 ^= p2[7]; |
270 | d0 ^= p3[0]; |
271 | d1 ^= p3[1]; |
272 | d2 ^= p3[2]; |
273 | d3 ^= p3[3]; |
274 | d4 ^= p3[4]; |
275 | d5 ^= p3[5]; |
276 | d6 ^= p3[6]; |
277 | d7 ^= p3[7]; |
278 | d0 ^= p4[0]; |
279 | d1 ^= p4[1]; |
280 | d2 ^= p4[2]; |
281 | d3 ^= p4[3]; |
282 | d4 ^= p4[4]; |
283 | d5 ^= p4[5]; |
284 | d6 ^= p4[6]; |
285 | d7 ^= p4[7]; |
286 | d0 ^= p5[0]; |
287 | d1 ^= p5[1]; |
288 | d2 ^= p5[2]; |
289 | d3 ^= p5[3]; |
290 | d4 ^= p5[4]; |
291 | d5 ^= p5[5]; |
292 | d6 ^= p5[6]; |
293 | d7 ^= p5[7]; |
294 | p1[0] = d0; /* Store the result (in bursts) */ |
295 | p1[1] = d1; |
296 | p1[2] = d2; |
297 | p1[3] = d3; |
298 | p1[4] = d4; |
299 | p1[5] = d5; |
300 | p1[6] = d6; |
301 | p1[7] = d7; |
302 | p1 += 8; |
303 | p2 += 8; |
304 | p3 += 8; |
305 | p4 += 8; |
306 | p5 += 8; |
307 | } while (--lines > 0); |
308 | } |
309 | |
310 | static void |
311 | xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1, |
312 | const unsigned long * __restrict p2) |
313 | { |
314 | long lines = bytes / (sizeof (long)) / 8 - 1; |
315 | prefetchw(x: p1); |
316 | prefetch(p2); |
317 | |
318 | do { |
319 | prefetchw(x: p1+8); |
320 | prefetch(p2+8); |
321 | once_more: |
322 | p1[0] ^= p2[0]; |
323 | p1[1] ^= p2[1]; |
324 | p1[2] ^= p2[2]; |
325 | p1[3] ^= p2[3]; |
326 | p1[4] ^= p2[4]; |
327 | p1[5] ^= p2[5]; |
328 | p1[6] ^= p2[6]; |
329 | p1[7] ^= p2[7]; |
330 | p1 += 8; |
331 | p2 += 8; |
332 | } while (--lines > 0); |
333 | if (lines == 0) |
334 | goto once_more; |
335 | } |
336 | |
337 | static void |
338 | xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1, |
339 | const unsigned long * __restrict p2, |
340 | const unsigned long * __restrict p3) |
341 | { |
342 | long lines = bytes / (sizeof (long)) / 8 - 1; |
343 | prefetchw(x: p1); |
344 | prefetch(p2); |
345 | prefetch(p3); |
346 | |
347 | do { |
348 | prefetchw(x: p1+8); |
349 | prefetch(p2+8); |
350 | prefetch(p3+8); |
351 | once_more: |
352 | p1[0] ^= p2[0] ^ p3[0]; |
353 | p1[1] ^= p2[1] ^ p3[1]; |
354 | p1[2] ^= p2[2] ^ p3[2]; |
355 | p1[3] ^= p2[3] ^ p3[3]; |
356 | p1[4] ^= p2[4] ^ p3[4]; |
357 | p1[5] ^= p2[5] ^ p3[5]; |
358 | p1[6] ^= p2[6] ^ p3[6]; |
359 | p1[7] ^= p2[7] ^ p3[7]; |
360 | p1 += 8; |
361 | p2 += 8; |
362 | p3 += 8; |
363 | } while (--lines > 0); |
364 | if (lines == 0) |
365 | goto once_more; |
366 | } |
367 | |
368 | static void |
369 | xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1, |
370 | const unsigned long * __restrict p2, |
371 | const unsigned long * __restrict p3, |
372 | const unsigned long * __restrict p4) |
373 | { |
374 | long lines = bytes / (sizeof (long)) / 8 - 1; |
375 | |
376 | prefetchw(x: p1); |
377 | prefetch(p2); |
378 | prefetch(p3); |
379 | prefetch(p4); |
380 | |
381 | do { |
382 | prefetchw(x: p1+8); |
383 | prefetch(p2+8); |
384 | prefetch(p3+8); |
385 | prefetch(p4+8); |
386 | once_more: |
387 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; |
388 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; |
389 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; |
390 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; |
391 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; |
392 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; |
393 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; |
394 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; |
395 | p1 += 8; |
396 | p2 += 8; |
397 | p3 += 8; |
398 | p4 += 8; |
399 | } while (--lines > 0); |
400 | if (lines == 0) |
401 | goto once_more; |
402 | } |
403 | |
404 | static void |
405 | xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1, |
406 | const unsigned long * __restrict p2, |
407 | const unsigned long * __restrict p3, |
408 | const unsigned long * __restrict p4, |
409 | const unsigned long * __restrict p5) |
410 | { |
411 | long lines = bytes / (sizeof (long)) / 8 - 1; |
412 | |
413 | prefetchw(x: p1); |
414 | prefetch(p2); |
415 | prefetch(p3); |
416 | prefetch(p4); |
417 | prefetch(p5); |
418 | |
419 | do { |
420 | prefetchw(x: p1+8); |
421 | prefetch(p2+8); |
422 | prefetch(p3+8); |
423 | prefetch(p4+8); |
424 | prefetch(p5+8); |
425 | once_more: |
426 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; |
427 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; |
428 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; |
429 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; |
430 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; |
431 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; |
432 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; |
433 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; |
434 | p1 += 8; |
435 | p2 += 8; |
436 | p3 += 8; |
437 | p4 += 8; |
438 | p5 += 8; |
439 | } while (--lines > 0); |
440 | if (lines == 0) |
441 | goto once_more; |
442 | } |
443 | |
444 | static void |
445 | xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, |
446 | const unsigned long * __restrict p2) |
447 | { |
448 | long lines = bytes / (sizeof (long)) / 8 - 1; |
449 | |
450 | prefetchw(x: p1); |
451 | prefetch(p2); |
452 | |
453 | do { |
454 | register long d0, d1, d2, d3, d4, d5, d6, d7; |
455 | |
456 | prefetchw(x: p1+8); |
457 | prefetch(p2+8); |
458 | once_more: |
459 | d0 = p1[0]; /* Pull the stuff into registers */ |
460 | d1 = p1[1]; /* ... in bursts, if possible. */ |
461 | d2 = p1[2]; |
462 | d3 = p1[3]; |
463 | d4 = p1[4]; |
464 | d5 = p1[5]; |
465 | d6 = p1[6]; |
466 | d7 = p1[7]; |
467 | d0 ^= p2[0]; |
468 | d1 ^= p2[1]; |
469 | d2 ^= p2[2]; |
470 | d3 ^= p2[3]; |
471 | d4 ^= p2[4]; |
472 | d5 ^= p2[5]; |
473 | d6 ^= p2[6]; |
474 | d7 ^= p2[7]; |
475 | p1[0] = d0; /* Store the result (in bursts) */ |
476 | p1[1] = d1; |
477 | p1[2] = d2; |
478 | p1[3] = d3; |
479 | p1[4] = d4; |
480 | p1[5] = d5; |
481 | p1[6] = d6; |
482 | p1[7] = d7; |
483 | p1 += 8; |
484 | p2 += 8; |
485 | } while (--lines > 0); |
486 | if (lines == 0) |
487 | goto once_more; |
488 | } |
489 | |
490 | static void |
491 | xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1, |
492 | const unsigned long * __restrict p2, |
493 | const unsigned long * __restrict p3) |
494 | { |
495 | long lines = bytes / (sizeof (long)) / 8 - 1; |
496 | |
497 | prefetchw(x: p1); |
498 | prefetch(p2); |
499 | prefetch(p3); |
500 | |
501 | do { |
502 | register long d0, d1, d2, d3, d4, d5, d6, d7; |
503 | |
504 | prefetchw(x: p1+8); |
505 | prefetch(p2+8); |
506 | prefetch(p3+8); |
507 | once_more: |
508 | d0 = p1[0]; /* Pull the stuff into registers */ |
509 | d1 = p1[1]; /* ... in bursts, if possible. */ |
510 | d2 = p1[2]; |
511 | d3 = p1[3]; |
512 | d4 = p1[4]; |
513 | d5 = p1[5]; |
514 | d6 = p1[6]; |
515 | d7 = p1[7]; |
516 | d0 ^= p2[0]; |
517 | d1 ^= p2[1]; |
518 | d2 ^= p2[2]; |
519 | d3 ^= p2[3]; |
520 | d4 ^= p2[4]; |
521 | d5 ^= p2[5]; |
522 | d6 ^= p2[6]; |
523 | d7 ^= p2[7]; |
524 | d0 ^= p3[0]; |
525 | d1 ^= p3[1]; |
526 | d2 ^= p3[2]; |
527 | d3 ^= p3[3]; |
528 | d4 ^= p3[4]; |
529 | d5 ^= p3[5]; |
530 | d6 ^= p3[6]; |
531 | d7 ^= p3[7]; |
532 | p1[0] = d0; /* Store the result (in bursts) */ |
533 | p1[1] = d1; |
534 | p1[2] = d2; |
535 | p1[3] = d3; |
536 | p1[4] = d4; |
537 | p1[5] = d5; |
538 | p1[6] = d6; |
539 | p1[7] = d7; |
540 | p1 += 8; |
541 | p2 += 8; |
542 | p3 += 8; |
543 | } while (--lines > 0); |
544 | if (lines == 0) |
545 | goto once_more; |
546 | } |
547 | |
548 | static void |
549 | xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1, |
550 | const unsigned long * __restrict p2, |
551 | const unsigned long * __restrict p3, |
552 | const unsigned long * __restrict p4) |
553 | { |
554 | long lines = bytes / (sizeof (long)) / 8 - 1; |
555 | |
556 | prefetchw(x: p1); |
557 | prefetch(p2); |
558 | prefetch(p3); |
559 | prefetch(p4); |
560 | |
561 | do { |
562 | register long d0, d1, d2, d3, d4, d5, d6, d7; |
563 | |
564 | prefetchw(x: p1+8); |
565 | prefetch(p2+8); |
566 | prefetch(p3+8); |
567 | prefetch(p4+8); |
568 | once_more: |
569 | d0 = p1[0]; /* Pull the stuff into registers */ |
570 | d1 = p1[1]; /* ... in bursts, if possible. */ |
571 | d2 = p1[2]; |
572 | d3 = p1[3]; |
573 | d4 = p1[4]; |
574 | d5 = p1[5]; |
575 | d6 = p1[6]; |
576 | d7 = p1[7]; |
577 | d0 ^= p2[0]; |
578 | d1 ^= p2[1]; |
579 | d2 ^= p2[2]; |
580 | d3 ^= p2[3]; |
581 | d4 ^= p2[4]; |
582 | d5 ^= p2[5]; |
583 | d6 ^= p2[6]; |
584 | d7 ^= p2[7]; |
585 | d0 ^= p3[0]; |
586 | d1 ^= p3[1]; |
587 | d2 ^= p3[2]; |
588 | d3 ^= p3[3]; |
589 | d4 ^= p3[4]; |
590 | d5 ^= p3[5]; |
591 | d6 ^= p3[6]; |
592 | d7 ^= p3[7]; |
593 | d0 ^= p4[0]; |
594 | d1 ^= p4[1]; |
595 | d2 ^= p4[2]; |
596 | d3 ^= p4[3]; |
597 | d4 ^= p4[4]; |
598 | d5 ^= p4[5]; |
599 | d6 ^= p4[6]; |
600 | d7 ^= p4[7]; |
601 | p1[0] = d0; /* Store the result (in bursts) */ |
602 | p1[1] = d1; |
603 | p1[2] = d2; |
604 | p1[3] = d3; |
605 | p1[4] = d4; |
606 | p1[5] = d5; |
607 | p1[6] = d6; |
608 | p1[7] = d7; |
609 | p1 += 8; |
610 | p2 += 8; |
611 | p3 += 8; |
612 | p4 += 8; |
613 | } while (--lines > 0); |
614 | if (lines == 0) |
615 | goto once_more; |
616 | } |
617 | |
618 | static void |
619 | xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, |
620 | const unsigned long * __restrict p2, |
621 | const unsigned long * __restrict p3, |
622 | const unsigned long * __restrict p4, |
623 | const unsigned long * __restrict p5) |
624 | { |
625 | long lines = bytes / (sizeof (long)) / 8 - 1; |
626 | |
627 | prefetchw(x: p1); |
628 | prefetch(p2); |
629 | prefetch(p3); |
630 | prefetch(p4); |
631 | prefetch(p5); |
632 | |
633 | do { |
634 | register long d0, d1, d2, d3, d4, d5, d6, d7; |
635 | |
636 | prefetchw(x: p1+8); |
637 | prefetch(p2+8); |
638 | prefetch(p3+8); |
639 | prefetch(p4+8); |
640 | prefetch(p5+8); |
641 | once_more: |
642 | d0 = p1[0]; /* Pull the stuff into registers */ |
643 | d1 = p1[1]; /* ... in bursts, if possible. */ |
644 | d2 = p1[2]; |
645 | d3 = p1[3]; |
646 | d4 = p1[4]; |
647 | d5 = p1[5]; |
648 | d6 = p1[6]; |
649 | d7 = p1[7]; |
650 | d0 ^= p2[0]; |
651 | d1 ^= p2[1]; |
652 | d2 ^= p2[2]; |
653 | d3 ^= p2[3]; |
654 | d4 ^= p2[4]; |
655 | d5 ^= p2[5]; |
656 | d6 ^= p2[6]; |
657 | d7 ^= p2[7]; |
658 | d0 ^= p3[0]; |
659 | d1 ^= p3[1]; |
660 | d2 ^= p3[2]; |
661 | d3 ^= p3[3]; |
662 | d4 ^= p3[4]; |
663 | d5 ^= p3[5]; |
664 | d6 ^= p3[6]; |
665 | d7 ^= p3[7]; |
666 | d0 ^= p4[0]; |
667 | d1 ^= p4[1]; |
668 | d2 ^= p4[2]; |
669 | d3 ^= p4[3]; |
670 | d4 ^= p4[4]; |
671 | d5 ^= p4[5]; |
672 | d6 ^= p4[6]; |
673 | d7 ^= p4[7]; |
674 | d0 ^= p5[0]; |
675 | d1 ^= p5[1]; |
676 | d2 ^= p5[2]; |
677 | d3 ^= p5[3]; |
678 | d4 ^= p5[4]; |
679 | d5 ^= p5[5]; |
680 | d6 ^= p5[6]; |
681 | d7 ^= p5[7]; |
682 | p1[0] = d0; /* Store the result (in bursts) */ |
683 | p1[1] = d1; |
684 | p1[2] = d2; |
685 | p1[3] = d3; |
686 | p1[4] = d4; |
687 | p1[5] = d5; |
688 | p1[6] = d6; |
689 | p1[7] = d7; |
690 | p1 += 8; |
691 | p2 += 8; |
692 | p3 += 8; |
693 | p4 += 8; |
694 | p5 += 8; |
695 | } while (--lines > 0); |
696 | if (lines == 0) |
697 | goto once_more; |
698 | } |
699 | |
700 | static struct xor_block_template xor_block_8regs = { |
701 | .name = "8regs" , |
702 | .do_2 = xor_8regs_2, |
703 | .do_3 = xor_8regs_3, |
704 | .do_4 = xor_8regs_4, |
705 | .do_5 = xor_8regs_5, |
706 | }; |
707 | |
708 | static struct xor_block_template xor_block_32regs = { |
709 | .name = "32regs" , |
710 | .do_2 = xor_32regs_2, |
711 | .do_3 = xor_32regs_3, |
712 | .do_4 = xor_32regs_4, |
713 | .do_5 = xor_32regs_5, |
714 | }; |
715 | |
716 | static struct xor_block_template xor_block_8regs_p __maybe_unused = { |
717 | .name = "8regs_prefetch" , |
718 | .do_2 = xor_8regs_p_2, |
719 | .do_3 = xor_8regs_p_3, |
720 | .do_4 = xor_8regs_p_4, |
721 | .do_5 = xor_8regs_p_5, |
722 | }; |
723 | |
724 | static struct xor_block_template xor_block_32regs_p __maybe_unused = { |
725 | .name = "32regs_prefetch" , |
726 | .do_2 = xor_32regs_p_2, |
727 | .do_3 = xor_32regs_p_3, |
728 | .do_4 = xor_32regs_p_4, |
729 | .do_5 = xor_32regs_p_5, |
730 | }; |
731 | |
732 | #define XOR_TRY_TEMPLATES \ |
733 | do { \ |
734 | xor_speed(&xor_block_8regs); \ |
735 | xor_speed(&xor_block_8regs_p); \ |
736 | xor_speed(&xor_block_32regs); \ |
737 | xor_speed(&xor_block_32regs_p); \ |
738 | } while (0) |
739 | |