1/* memcpy with SSSE3 and REP string
2 Copyright (C) 2010-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20
21#if IS_IN (libc)
22
23#include "asm-syntax.h"
24
25#ifndef MEMCPY
26# define MEMCPY __memcpy_ssse3_back
27# define MEMCPY_CHK __memcpy_chk_ssse3_back
28# define MEMPCPY __mempcpy_ssse3_back
29# define MEMPCPY_CHK __mempcpy_chk_ssse3_back
30#endif
31
32#define JMPTBL(I, B) I - B
33
34/* Branch to an entry in a jump table. TABLE is a jump table with
35 relative offsets. INDEX is a register contains the index into the
36 jump table. SCALE is the scale of INDEX. */
37#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
38 lea TABLE(%rip), %r11; \
39 movslq (%r11, INDEX, SCALE), INDEX; \
40 lea (%r11, INDEX), INDEX; \
41 _CET_NOTRACK jmp *INDEX; \
42 ud2
43
44 .section .text.ssse3,"ax",@progbits
45#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
46ENTRY (MEMPCPY_CHK)
47 cmp %RDX_LP, %RCX_LP
48 jb HIDDEN_JUMPTARGET (__chk_fail)
49END (MEMPCPY_CHK)
50
51ENTRY (MEMPCPY)
52 mov %RDI_LP, %RAX_LP
53 add %RDX_LP, %RAX_LP
54 jmp L(start)
55END (MEMPCPY)
56#endif
57
58#if !defined USE_AS_BCOPY
59ENTRY (MEMCPY_CHK)
60 cmp %RDX_LP, %RCX_LP
61 jb HIDDEN_JUMPTARGET (__chk_fail)
62END (MEMCPY_CHK)
63#endif
64
65ENTRY (MEMCPY)
66 mov %RDI_LP, %RAX_LP
67#ifdef USE_AS_MEMPCPY
68 add %RDX_LP, %RAX_LP
69#endif
70
71#ifdef __ILP32__
72 /* Clear the upper 32 bits. */
73 mov %edx, %edx
74#endif
75
76#ifdef USE_AS_MEMMOVE
77 cmp %rsi, %rdi
78 jb L(copy_forward)
79 je L(bwd_write_0bytes)
80 cmp $144, %rdx
81 jae L(copy_backward)
82 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
83L(copy_forward):
84#endif
85L(start):
86 cmp $144, %rdx
87 jae L(144bytesormore)
88
89L(fwd_write_less32bytes):
90#ifndef USE_AS_MEMMOVE
91 cmp %dil, %sil
92 jbe L(bk_write)
93#endif
94 add %rdx, %rsi
95 add %rdx, %rdi
96 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
97#ifndef USE_AS_MEMMOVE
98L(bk_write):
99
100 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
101#endif
102
103 .p2align 4
104L(144bytesormore):
105
106#ifndef USE_AS_MEMMOVE
107 cmp %dil, %sil
108 jle L(copy_backward)
109#endif
110 movdqu (%rsi), %xmm0
111 mov %rdi, %r8
112 and $-16, %rdi
113 add $16, %rdi
114 mov %rdi, %r9
115 sub %r8, %r9
116 sub %r9, %rdx
117 add %r9, %rsi
118 mov %rsi, %r9
119 and $0xf, %r9
120 jz L(shl_0)
121#ifdef DATA_CACHE_SIZE
122 mov $DATA_CACHE_SIZE, %RCX_LP
123#else
124 mov __x86_data_cache_size(%rip), %RCX_LP
125#endif
126 cmp %rcx, %rdx
127 jae L(gobble_mem_fwd)
128 lea L(shl_table_fwd)(%rip), %r11
129 sub $0x80, %rdx
130 movslq (%r11, %r9, 4), %r9
131 add %r11, %r9
132 _CET_NOTRACK jmp *%r9
133 ud2
134
135 .p2align 4
136L(copy_backward):
137#ifdef DATA_CACHE_SIZE
138 mov $DATA_CACHE_SIZE, %RCX_LP
139#else
140 mov __x86_data_cache_size(%rip), %RCX_LP
141#endif
142 shl $1, %rcx
143 cmp %rcx, %rdx
144 ja L(gobble_mem_bwd)
145
146 add %rdx, %rdi
147 add %rdx, %rsi
148 movdqu -16(%rsi), %xmm0
149 lea -16(%rdi), %r8
150 mov %rdi, %r9
151 and $0xf, %r9
152 xor %r9, %rdi
153 sub %r9, %rsi
154 sub %r9, %rdx
155 mov %rsi, %r9
156 and $0xf, %r9
157 jz L(shl_0_bwd)
158 lea L(shl_table_bwd)(%rip), %r11
159 sub $0x80, %rdx
160 movslq (%r11, %r9, 4), %r9
161 add %r11, %r9
162 _CET_NOTRACK jmp *%r9
163 ud2
164
165 .p2align 4
166L(shl_0):
167
168 mov %rdx, %r9
169 shr $8, %r9
170 add %rdx, %r9
171#ifdef DATA_CACHE_SIZE
172 cmp $DATA_CACHE_SIZE_HALF, %R9_LP
173#else
174 cmp __x86_data_cache_size_half(%rip), %R9_LP
175#endif
176 jae L(gobble_mem_fwd)
177 sub $0x80, %rdx
178 .p2align 4
179L(shl_0_loop):
180 movdqa (%rsi), %xmm1
181 movdqa %xmm1, (%rdi)
182 movaps 0x10(%rsi), %xmm2
183 movaps %xmm2, 0x10(%rdi)
184 movaps 0x20(%rsi), %xmm3
185 movaps %xmm3, 0x20(%rdi)
186 movaps 0x30(%rsi), %xmm4
187 movaps %xmm4, 0x30(%rdi)
188 movaps 0x40(%rsi), %xmm1
189 movaps %xmm1, 0x40(%rdi)
190 movaps 0x50(%rsi), %xmm2
191 movaps %xmm2, 0x50(%rdi)
192 movaps 0x60(%rsi), %xmm3
193 movaps %xmm3, 0x60(%rdi)
194 movaps 0x70(%rsi), %xmm4
195 movaps %xmm4, 0x70(%rdi)
196 sub $0x80, %rdx
197 lea 0x80(%rsi), %rsi
198 lea 0x80(%rdi), %rdi
199 jae L(shl_0_loop)
200 movdqu %xmm0, (%r8)
201 add $0x80, %rdx
202 add %rdx, %rsi
203 add %rdx, %rdi
204 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
205
206 .p2align 4
207L(shl_0_bwd):
208 sub $0x80, %rdx
209L(copy_backward_loop):
210 movaps -0x10(%rsi), %xmm1
211 movaps %xmm1, -0x10(%rdi)
212 movaps -0x20(%rsi), %xmm2
213 movaps %xmm2, -0x20(%rdi)
214 movaps -0x30(%rsi), %xmm3
215 movaps %xmm3, -0x30(%rdi)
216 movaps -0x40(%rsi), %xmm4
217 movaps %xmm4, -0x40(%rdi)
218 movaps -0x50(%rsi), %xmm5
219 movaps %xmm5, -0x50(%rdi)
220 movaps -0x60(%rsi), %xmm5
221 movaps %xmm5, -0x60(%rdi)
222 movaps -0x70(%rsi), %xmm5
223 movaps %xmm5, -0x70(%rdi)
224 movaps -0x80(%rsi), %xmm5
225 movaps %xmm5, -0x80(%rdi)
226 sub $0x80, %rdx
227 lea -0x80(%rdi), %rdi
228 lea -0x80(%rsi), %rsi
229 jae L(copy_backward_loop)
230
231 movdqu %xmm0, (%r8)
232 add $0x80, %rdx
233 sub %rdx, %rdi
234 sub %rdx, %rsi
235 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
236
237 .p2align 4
238L(shl_1):
239 sub $0x80, %rdx
240 movaps -0x01(%rsi), %xmm1
241 movaps 0x0f(%rsi), %xmm2
242 movaps 0x1f(%rsi), %xmm3
243 movaps 0x2f(%rsi), %xmm4
244 movaps 0x3f(%rsi), %xmm5
245 movaps 0x4f(%rsi), %xmm6
246 movaps 0x5f(%rsi), %xmm7
247 movaps 0x6f(%rsi), %xmm8
248 movaps 0x7f(%rsi), %xmm9
249 lea 0x80(%rsi), %rsi
250 palignr $1, %xmm8, %xmm9
251 movaps %xmm9, 0x70(%rdi)
252 palignr $1, %xmm7, %xmm8
253 movaps %xmm8, 0x60(%rdi)
254 palignr $1, %xmm6, %xmm7
255 movaps %xmm7, 0x50(%rdi)
256 palignr $1, %xmm5, %xmm6
257 movaps %xmm6, 0x40(%rdi)
258 palignr $1, %xmm4, %xmm5
259 movaps %xmm5, 0x30(%rdi)
260 palignr $1, %xmm3, %xmm4
261 movaps %xmm4, 0x20(%rdi)
262 palignr $1, %xmm2, %xmm3
263 movaps %xmm3, 0x10(%rdi)
264 palignr $1, %xmm1, %xmm2
265 movaps %xmm2, (%rdi)
266 lea 0x80(%rdi), %rdi
267 jae L(shl_1)
268 movdqu %xmm0, (%r8)
269 add $0x80, %rdx
270 add %rdx, %rdi
271 add %rdx, %rsi
272 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
273
274 .p2align 4
275L(shl_1_bwd):
276 movaps -0x01(%rsi), %xmm1
277
278 movaps -0x11(%rsi), %xmm2
279 palignr $1, %xmm2, %xmm1
280 movaps %xmm1, -0x10(%rdi)
281
282 movaps -0x21(%rsi), %xmm3
283 palignr $1, %xmm3, %xmm2
284 movaps %xmm2, -0x20(%rdi)
285
286 movaps -0x31(%rsi), %xmm4
287 palignr $1, %xmm4, %xmm3
288 movaps %xmm3, -0x30(%rdi)
289
290 movaps -0x41(%rsi), %xmm5
291 palignr $1, %xmm5, %xmm4
292 movaps %xmm4, -0x40(%rdi)
293
294 movaps -0x51(%rsi), %xmm6
295 palignr $1, %xmm6, %xmm5
296 movaps %xmm5, -0x50(%rdi)
297
298 movaps -0x61(%rsi), %xmm7
299 palignr $1, %xmm7, %xmm6
300 movaps %xmm6, -0x60(%rdi)
301
302 movaps -0x71(%rsi), %xmm8
303 palignr $1, %xmm8, %xmm7
304 movaps %xmm7, -0x70(%rdi)
305
306 movaps -0x81(%rsi), %xmm9
307 palignr $1, %xmm9, %xmm8
308 movaps %xmm8, -0x80(%rdi)
309
310 sub $0x80, %rdx
311 lea -0x80(%rdi), %rdi
312 lea -0x80(%rsi), %rsi
313 jae L(shl_1_bwd)
314 movdqu %xmm0, (%r8)
315 add $0x80, %rdx
316 sub %rdx, %rdi
317 sub %rdx, %rsi
318 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
319
320 .p2align 4
321L(shl_2):
322 sub $0x80, %rdx
323 movaps -0x02(%rsi), %xmm1
324 movaps 0x0e(%rsi), %xmm2
325 movaps 0x1e(%rsi), %xmm3
326 movaps 0x2e(%rsi), %xmm4
327 movaps 0x3e(%rsi), %xmm5
328 movaps 0x4e(%rsi), %xmm6
329 movaps 0x5e(%rsi), %xmm7
330 movaps 0x6e(%rsi), %xmm8
331 movaps 0x7e(%rsi), %xmm9
332 lea 0x80(%rsi), %rsi
333 palignr $2, %xmm8, %xmm9
334 movaps %xmm9, 0x70(%rdi)
335 palignr $2, %xmm7, %xmm8
336 movaps %xmm8, 0x60(%rdi)
337 palignr $2, %xmm6, %xmm7
338 movaps %xmm7, 0x50(%rdi)
339 palignr $2, %xmm5, %xmm6
340 movaps %xmm6, 0x40(%rdi)
341 palignr $2, %xmm4, %xmm5
342 movaps %xmm5, 0x30(%rdi)
343 palignr $2, %xmm3, %xmm4
344 movaps %xmm4, 0x20(%rdi)
345 palignr $2, %xmm2, %xmm3
346 movaps %xmm3, 0x10(%rdi)
347 palignr $2, %xmm1, %xmm2
348 movaps %xmm2, (%rdi)
349 lea 0x80(%rdi), %rdi
350 jae L(shl_2)
351 movdqu %xmm0, (%r8)
352 add $0x80, %rdx
353 add %rdx, %rdi
354 add %rdx, %rsi
355 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
356
357 .p2align 4
358L(shl_2_bwd):
359 movaps -0x02(%rsi), %xmm1
360
361 movaps -0x12(%rsi), %xmm2
362 palignr $2, %xmm2, %xmm1
363 movaps %xmm1, -0x10(%rdi)
364
365 movaps -0x22(%rsi), %xmm3
366 palignr $2, %xmm3, %xmm2
367 movaps %xmm2, -0x20(%rdi)
368
369 movaps -0x32(%rsi), %xmm4
370 palignr $2, %xmm4, %xmm3
371 movaps %xmm3, -0x30(%rdi)
372
373 movaps -0x42(%rsi), %xmm5
374 palignr $2, %xmm5, %xmm4
375 movaps %xmm4, -0x40(%rdi)
376
377 movaps -0x52(%rsi), %xmm6
378 palignr $2, %xmm6, %xmm5
379 movaps %xmm5, -0x50(%rdi)
380
381 movaps -0x62(%rsi), %xmm7
382 palignr $2, %xmm7, %xmm6
383 movaps %xmm6, -0x60(%rdi)
384
385 movaps -0x72(%rsi), %xmm8
386 palignr $2, %xmm8, %xmm7
387 movaps %xmm7, -0x70(%rdi)
388
389 movaps -0x82(%rsi), %xmm9
390 palignr $2, %xmm9, %xmm8
391 movaps %xmm8, -0x80(%rdi)
392
393 sub $0x80, %rdx
394 lea -0x80(%rdi), %rdi
395 lea -0x80(%rsi), %rsi
396 jae L(shl_2_bwd)
397 movdqu %xmm0, (%r8)
398 add $0x80, %rdx
399 sub %rdx, %rdi
400 sub %rdx, %rsi
401 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
402
403 .p2align 4
404L(shl_3):
405 sub $0x80, %rdx
406 movaps -0x03(%rsi), %xmm1
407 movaps 0x0d(%rsi), %xmm2
408 movaps 0x1d(%rsi), %xmm3
409 movaps 0x2d(%rsi), %xmm4
410 movaps 0x3d(%rsi), %xmm5
411 movaps 0x4d(%rsi), %xmm6
412 movaps 0x5d(%rsi), %xmm7
413 movaps 0x6d(%rsi), %xmm8
414 movaps 0x7d(%rsi), %xmm9
415 lea 0x80(%rsi), %rsi
416 palignr $3, %xmm8, %xmm9
417 movaps %xmm9, 0x70(%rdi)
418 palignr $3, %xmm7, %xmm8
419 movaps %xmm8, 0x60(%rdi)
420 palignr $3, %xmm6, %xmm7
421 movaps %xmm7, 0x50(%rdi)
422 palignr $3, %xmm5, %xmm6
423 movaps %xmm6, 0x40(%rdi)
424 palignr $3, %xmm4, %xmm5
425 movaps %xmm5, 0x30(%rdi)
426 palignr $3, %xmm3, %xmm4
427 movaps %xmm4, 0x20(%rdi)
428 palignr $3, %xmm2, %xmm3
429 movaps %xmm3, 0x10(%rdi)
430 palignr $3, %xmm1, %xmm2
431 movaps %xmm2, (%rdi)
432 lea 0x80(%rdi), %rdi
433 jae L(shl_3)
434 movdqu %xmm0, (%r8)
435 add $0x80, %rdx
436 add %rdx, %rdi
437 add %rdx, %rsi
438 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
439
440 .p2align 4
441L(shl_3_bwd):
442 movaps -0x03(%rsi), %xmm1
443
444 movaps -0x13(%rsi), %xmm2
445 palignr $3, %xmm2, %xmm1
446 movaps %xmm1, -0x10(%rdi)
447
448 movaps -0x23(%rsi), %xmm3
449 palignr $3, %xmm3, %xmm2
450 movaps %xmm2, -0x20(%rdi)
451
452 movaps -0x33(%rsi), %xmm4
453 palignr $3, %xmm4, %xmm3
454 movaps %xmm3, -0x30(%rdi)
455
456 movaps -0x43(%rsi), %xmm5
457 palignr $3, %xmm5, %xmm4
458 movaps %xmm4, -0x40(%rdi)
459
460 movaps -0x53(%rsi), %xmm6
461 palignr $3, %xmm6, %xmm5
462 movaps %xmm5, -0x50(%rdi)
463
464 movaps -0x63(%rsi), %xmm7
465 palignr $3, %xmm7, %xmm6
466 movaps %xmm6, -0x60(%rdi)
467
468 movaps -0x73(%rsi), %xmm8
469 palignr $3, %xmm8, %xmm7
470 movaps %xmm7, -0x70(%rdi)
471
472 movaps -0x83(%rsi), %xmm9
473 palignr $3, %xmm9, %xmm8
474 movaps %xmm8, -0x80(%rdi)
475
476 sub $0x80, %rdx
477 lea -0x80(%rdi), %rdi
478 lea -0x80(%rsi), %rsi
479 jae L(shl_3_bwd)
480 movdqu %xmm0, (%r8)
481 add $0x80, %rdx
482 sub %rdx, %rdi
483 sub %rdx, %rsi
484 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
485
486 .p2align 4
487L(shl_4):
488 sub $0x80, %rdx
489 movaps -0x04(%rsi), %xmm1
490 movaps 0x0c(%rsi), %xmm2
491 movaps 0x1c(%rsi), %xmm3
492 movaps 0x2c(%rsi), %xmm4
493 movaps 0x3c(%rsi), %xmm5
494 movaps 0x4c(%rsi), %xmm6
495 movaps 0x5c(%rsi), %xmm7
496 movaps 0x6c(%rsi), %xmm8
497 movaps 0x7c(%rsi), %xmm9
498 lea 0x80(%rsi), %rsi
499 palignr $4, %xmm8, %xmm9
500 movaps %xmm9, 0x70(%rdi)
501 palignr $4, %xmm7, %xmm8
502 movaps %xmm8, 0x60(%rdi)
503 palignr $4, %xmm6, %xmm7
504 movaps %xmm7, 0x50(%rdi)
505 palignr $4, %xmm5, %xmm6
506 movaps %xmm6, 0x40(%rdi)
507 palignr $4, %xmm4, %xmm5
508 movaps %xmm5, 0x30(%rdi)
509 palignr $4, %xmm3, %xmm4
510 movaps %xmm4, 0x20(%rdi)
511 palignr $4, %xmm2, %xmm3
512 movaps %xmm3, 0x10(%rdi)
513 palignr $4, %xmm1, %xmm2
514 movaps %xmm2, (%rdi)
515 lea 0x80(%rdi), %rdi
516 jae L(shl_4)
517 movdqu %xmm0, (%r8)
518 add $0x80, %rdx
519 add %rdx, %rdi
520 add %rdx, %rsi
521 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
522
523 .p2align 4
524L(shl_4_bwd):
525 movaps -0x04(%rsi), %xmm1
526
527 movaps -0x14(%rsi), %xmm2
528 palignr $4, %xmm2, %xmm1
529 movaps %xmm1, -0x10(%rdi)
530
531 movaps -0x24(%rsi), %xmm3
532 palignr $4, %xmm3, %xmm2
533 movaps %xmm2, -0x20(%rdi)
534
535 movaps -0x34(%rsi), %xmm4
536 palignr $4, %xmm4, %xmm3
537 movaps %xmm3, -0x30(%rdi)
538
539 movaps -0x44(%rsi), %xmm5
540 palignr $4, %xmm5, %xmm4
541 movaps %xmm4, -0x40(%rdi)
542
543 movaps -0x54(%rsi), %xmm6
544 palignr $4, %xmm6, %xmm5
545 movaps %xmm5, -0x50(%rdi)
546
547 movaps -0x64(%rsi), %xmm7
548 palignr $4, %xmm7, %xmm6
549 movaps %xmm6, -0x60(%rdi)
550
551 movaps -0x74(%rsi), %xmm8
552 palignr $4, %xmm8, %xmm7
553 movaps %xmm7, -0x70(%rdi)
554
555 movaps -0x84(%rsi), %xmm9
556 palignr $4, %xmm9, %xmm8
557 movaps %xmm8, -0x80(%rdi)
558
559 sub $0x80, %rdx
560 lea -0x80(%rdi), %rdi
561 lea -0x80(%rsi), %rsi
562 jae L(shl_4_bwd)
563 movdqu %xmm0, (%r8)
564 add $0x80, %rdx
565 sub %rdx, %rdi
566 sub %rdx, %rsi
567 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
568
569 .p2align 4
570L(shl_5):
571 sub $0x80, %rdx
572 movaps -0x05(%rsi), %xmm1
573 movaps 0x0b(%rsi), %xmm2
574 movaps 0x1b(%rsi), %xmm3
575 movaps 0x2b(%rsi), %xmm4
576 movaps 0x3b(%rsi), %xmm5
577 movaps 0x4b(%rsi), %xmm6
578 movaps 0x5b(%rsi), %xmm7
579 movaps 0x6b(%rsi), %xmm8
580 movaps 0x7b(%rsi), %xmm9
581 lea 0x80(%rsi), %rsi
582 palignr $5, %xmm8, %xmm9
583 movaps %xmm9, 0x70(%rdi)
584 palignr $5, %xmm7, %xmm8
585 movaps %xmm8, 0x60(%rdi)
586 palignr $5, %xmm6, %xmm7
587 movaps %xmm7, 0x50(%rdi)
588 palignr $5, %xmm5, %xmm6
589 movaps %xmm6, 0x40(%rdi)
590 palignr $5, %xmm4, %xmm5
591 movaps %xmm5, 0x30(%rdi)
592 palignr $5, %xmm3, %xmm4
593 movaps %xmm4, 0x20(%rdi)
594 palignr $5, %xmm2, %xmm3
595 movaps %xmm3, 0x10(%rdi)
596 palignr $5, %xmm1, %xmm2
597 movaps %xmm2, (%rdi)
598 lea 0x80(%rdi), %rdi
599 jae L(shl_5)
600 movdqu %xmm0, (%r8)
601 add $0x80, %rdx
602 add %rdx, %rdi
603 add %rdx, %rsi
604 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
605
606 .p2align 4
607L(shl_5_bwd):
608 movaps -0x05(%rsi), %xmm1
609
610 movaps -0x15(%rsi), %xmm2
611 palignr $5, %xmm2, %xmm1
612 movaps %xmm1, -0x10(%rdi)
613
614 movaps -0x25(%rsi), %xmm3
615 palignr $5, %xmm3, %xmm2
616 movaps %xmm2, -0x20(%rdi)
617
618 movaps -0x35(%rsi), %xmm4
619 palignr $5, %xmm4, %xmm3
620 movaps %xmm3, -0x30(%rdi)
621
622 movaps -0x45(%rsi), %xmm5
623 palignr $5, %xmm5, %xmm4
624 movaps %xmm4, -0x40(%rdi)
625
626 movaps -0x55(%rsi), %xmm6
627 palignr $5, %xmm6, %xmm5
628 movaps %xmm5, -0x50(%rdi)
629
630 movaps -0x65(%rsi), %xmm7
631 palignr $5, %xmm7, %xmm6
632 movaps %xmm6, -0x60(%rdi)
633
634 movaps -0x75(%rsi), %xmm8
635 palignr $5, %xmm8, %xmm7
636 movaps %xmm7, -0x70(%rdi)
637
638 movaps -0x85(%rsi), %xmm9
639 palignr $5, %xmm9, %xmm8
640 movaps %xmm8, -0x80(%rdi)
641
642 sub $0x80, %rdx
643 lea -0x80(%rdi), %rdi
644 lea -0x80(%rsi), %rsi
645 jae L(shl_5_bwd)
646 movdqu %xmm0, (%r8)
647 add $0x80, %rdx
648 sub %rdx, %rdi
649 sub %rdx, %rsi
650 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
651
652 .p2align 4
653L(shl_6):
654 sub $0x80, %rdx
655 movaps -0x06(%rsi), %xmm1
656 movaps 0x0a(%rsi), %xmm2
657 movaps 0x1a(%rsi), %xmm3
658 movaps 0x2a(%rsi), %xmm4
659 movaps 0x3a(%rsi), %xmm5
660 movaps 0x4a(%rsi), %xmm6
661 movaps 0x5a(%rsi), %xmm7
662 movaps 0x6a(%rsi), %xmm8
663 movaps 0x7a(%rsi), %xmm9
664 lea 0x80(%rsi), %rsi
665 palignr $6, %xmm8, %xmm9
666 movaps %xmm9, 0x70(%rdi)
667 palignr $6, %xmm7, %xmm8
668 movaps %xmm8, 0x60(%rdi)
669 palignr $6, %xmm6, %xmm7
670 movaps %xmm7, 0x50(%rdi)
671 palignr $6, %xmm5, %xmm6
672 movaps %xmm6, 0x40(%rdi)
673 palignr $6, %xmm4, %xmm5
674 movaps %xmm5, 0x30(%rdi)
675 palignr $6, %xmm3, %xmm4
676 movaps %xmm4, 0x20(%rdi)
677 palignr $6, %xmm2, %xmm3
678 movaps %xmm3, 0x10(%rdi)
679 palignr $6, %xmm1, %xmm2
680 movaps %xmm2, (%rdi)
681 lea 0x80(%rdi), %rdi
682 jae L(shl_6)
683 movdqu %xmm0, (%r8)
684 add $0x80, %rdx
685 add %rdx, %rdi
686 add %rdx, %rsi
687 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
688
689 .p2align 4
690L(shl_6_bwd):
691 movaps -0x06(%rsi), %xmm1
692
693 movaps -0x16(%rsi), %xmm2
694 palignr $6, %xmm2, %xmm1
695 movaps %xmm1, -0x10(%rdi)
696
697 movaps -0x26(%rsi), %xmm3
698 palignr $6, %xmm3, %xmm2
699 movaps %xmm2, -0x20(%rdi)
700
701 movaps -0x36(%rsi), %xmm4
702 palignr $6, %xmm4, %xmm3
703 movaps %xmm3, -0x30(%rdi)
704
705 movaps -0x46(%rsi), %xmm5
706 palignr $6, %xmm5, %xmm4
707 movaps %xmm4, -0x40(%rdi)
708
709 movaps -0x56(%rsi), %xmm6
710 palignr $6, %xmm6, %xmm5
711 movaps %xmm5, -0x50(%rdi)
712
713 movaps -0x66(%rsi), %xmm7
714 palignr $6, %xmm7, %xmm6
715 movaps %xmm6, -0x60(%rdi)
716
717 movaps -0x76(%rsi), %xmm8
718 palignr $6, %xmm8, %xmm7
719 movaps %xmm7, -0x70(%rdi)
720
721 movaps -0x86(%rsi), %xmm9
722 palignr $6, %xmm9, %xmm8
723 movaps %xmm8, -0x80(%rdi)
724
725 sub $0x80, %rdx
726 lea -0x80(%rdi), %rdi
727 lea -0x80(%rsi), %rsi
728 jae L(shl_6_bwd)
729 movdqu %xmm0, (%r8)
730 add $0x80, %rdx
731 sub %rdx, %rdi
732 sub %rdx, %rsi
733 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
734
735 .p2align 4
736L(shl_7):
737 sub $0x80, %rdx
738 movaps -0x07(%rsi), %xmm1
739 movaps 0x09(%rsi), %xmm2
740 movaps 0x19(%rsi), %xmm3
741 movaps 0x29(%rsi), %xmm4
742 movaps 0x39(%rsi), %xmm5
743 movaps 0x49(%rsi), %xmm6
744 movaps 0x59(%rsi), %xmm7
745 movaps 0x69(%rsi), %xmm8
746 movaps 0x79(%rsi), %xmm9
747 lea 0x80(%rsi), %rsi
748 palignr $7, %xmm8, %xmm9
749 movaps %xmm9, 0x70(%rdi)
750 palignr $7, %xmm7, %xmm8
751 movaps %xmm8, 0x60(%rdi)
752 palignr $7, %xmm6, %xmm7
753 movaps %xmm7, 0x50(%rdi)
754 palignr $7, %xmm5, %xmm6
755 movaps %xmm6, 0x40(%rdi)
756 palignr $7, %xmm4, %xmm5
757 movaps %xmm5, 0x30(%rdi)
758 palignr $7, %xmm3, %xmm4
759 movaps %xmm4, 0x20(%rdi)
760 palignr $7, %xmm2, %xmm3
761 movaps %xmm3, 0x10(%rdi)
762 palignr $7, %xmm1, %xmm2
763 movaps %xmm2, (%rdi)
764 lea 0x80(%rdi), %rdi
765 jae L(shl_7)
766 movdqu %xmm0, (%r8)
767 add $0x80, %rdx
768 add %rdx, %rdi
769 add %rdx, %rsi
770 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
771
772 .p2align 4
773L(shl_7_bwd):
774 movaps -0x07(%rsi), %xmm1
775
776 movaps -0x17(%rsi), %xmm2
777 palignr $7, %xmm2, %xmm1
778 movaps %xmm1, -0x10(%rdi)
779
780 movaps -0x27(%rsi), %xmm3
781 palignr $7, %xmm3, %xmm2
782 movaps %xmm2, -0x20(%rdi)
783
784 movaps -0x37(%rsi), %xmm4
785 palignr $7, %xmm4, %xmm3
786 movaps %xmm3, -0x30(%rdi)
787
788 movaps -0x47(%rsi), %xmm5
789 palignr $7, %xmm5, %xmm4
790 movaps %xmm4, -0x40(%rdi)
791
792 movaps -0x57(%rsi), %xmm6
793 palignr $7, %xmm6, %xmm5
794 movaps %xmm5, -0x50(%rdi)
795
796 movaps -0x67(%rsi), %xmm7
797 palignr $7, %xmm7, %xmm6
798 movaps %xmm6, -0x60(%rdi)
799
800 movaps -0x77(%rsi), %xmm8
801 palignr $7, %xmm8, %xmm7
802 movaps %xmm7, -0x70(%rdi)
803
804 movaps -0x87(%rsi), %xmm9
805 palignr $7, %xmm9, %xmm8
806 movaps %xmm8, -0x80(%rdi)
807
808 sub $0x80, %rdx
809 lea -0x80(%rdi), %rdi
810 lea -0x80(%rsi), %rsi
811 jae L(shl_7_bwd)
812 movdqu %xmm0, (%r8)
813 add $0x80, %rdx
814 sub %rdx, %rdi
815 sub %rdx, %rsi
816 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
817
818 .p2align 4
819L(shl_8):
820 sub $0x80, %rdx
821 movaps -0x08(%rsi), %xmm1
822 movaps 0x08(%rsi), %xmm2
823 movaps 0x18(%rsi), %xmm3
824 movaps 0x28(%rsi), %xmm4
825 movaps 0x38(%rsi), %xmm5
826 movaps 0x48(%rsi), %xmm6
827 movaps 0x58(%rsi), %xmm7
828 movaps 0x68(%rsi), %xmm8
829 movaps 0x78(%rsi), %xmm9
830 lea 0x80(%rsi), %rsi
831 palignr $8, %xmm8, %xmm9
832 movaps %xmm9, 0x70(%rdi)
833 palignr $8, %xmm7, %xmm8
834 movaps %xmm8, 0x60(%rdi)
835 palignr $8, %xmm6, %xmm7
836 movaps %xmm7, 0x50(%rdi)
837 palignr $8, %xmm5, %xmm6
838 movaps %xmm6, 0x40(%rdi)
839 palignr $8, %xmm4, %xmm5
840 movaps %xmm5, 0x30(%rdi)
841 palignr $8, %xmm3, %xmm4
842 movaps %xmm4, 0x20(%rdi)
843 palignr $8, %xmm2, %xmm3
844 movaps %xmm3, 0x10(%rdi)
845 palignr $8, %xmm1, %xmm2
846 movaps %xmm2, (%rdi)
847 lea 0x80(%rdi), %rdi
848 jae L(shl_8)
849 movdqu %xmm0, (%r8)
850 add $0x80, %rdx
851 add %rdx, %rdi
852 add %rdx, %rsi
853 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
854
855 .p2align 4
856L(shl_8_bwd):
857 movaps -0x08(%rsi), %xmm1
858
859 movaps -0x18(%rsi), %xmm2
860 palignr $8, %xmm2, %xmm1
861 movaps %xmm1, -0x10(%rdi)
862
863 movaps -0x28(%rsi), %xmm3
864 palignr $8, %xmm3, %xmm2
865 movaps %xmm2, -0x20(%rdi)
866
867 movaps -0x38(%rsi), %xmm4
868 palignr $8, %xmm4, %xmm3
869 movaps %xmm3, -0x30(%rdi)
870
871 movaps -0x48(%rsi), %xmm5
872 palignr $8, %xmm5, %xmm4
873 movaps %xmm4, -0x40(%rdi)
874
875 movaps -0x58(%rsi), %xmm6
876 palignr $8, %xmm6, %xmm5
877 movaps %xmm5, -0x50(%rdi)
878
879 movaps -0x68(%rsi), %xmm7
880 palignr $8, %xmm7, %xmm6
881 movaps %xmm6, -0x60(%rdi)
882
883 movaps -0x78(%rsi), %xmm8
884 palignr $8, %xmm8, %xmm7
885 movaps %xmm7, -0x70(%rdi)
886
887 movaps -0x88(%rsi), %xmm9
888 palignr $8, %xmm9, %xmm8
889 movaps %xmm8, -0x80(%rdi)
890
891 sub $0x80, %rdx
892 lea -0x80(%rdi), %rdi
893 lea -0x80(%rsi), %rsi
894 jae L(shl_8_bwd)
895L(shl_8_end_bwd):
896 movdqu %xmm0, (%r8)
897 add $0x80, %rdx
898 sub %rdx, %rdi
899 sub %rdx, %rsi
900 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
901
902 .p2align 4
903L(shl_9):
904 sub $0x80, %rdx
905 movaps -0x09(%rsi), %xmm1
906 movaps 0x07(%rsi), %xmm2
907 movaps 0x17(%rsi), %xmm3
908 movaps 0x27(%rsi), %xmm4
909 movaps 0x37(%rsi), %xmm5
910 movaps 0x47(%rsi), %xmm6
911 movaps 0x57(%rsi), %xmm7
912 movaps 0x67(%rsi), %xmm8
913 movaps 0x77(%rsi), %xmm9
914 lea 0x80(%rsi), %rsi
915 palignr $9, %xmm8, %xmm9
916 movaps %xmm9, 0x70(%rdi)
917 palignr $9, %xmm7, %xmm8
918 movaps %xmm8, 0x60(%rdi)
919 palignr $9, %xmm6, %xmm7
920 movaps %xmm7, 0x50(%rdi)
921 palignr $9, %xmm5, %xmm6
922 movaps %xmm6, 0x40(%rdi)
923 palignr $9, %xmm4, %xmm5
924 movaps %xmm5, 0x30(%rdi)
925 palignr $9, %xmm3, %xmm4
926 movaps %xmm4, 0x20(%rdi)
927 palignr $9, %xmm2, %xmm3
928 movaps %xmm3, 0x10(%rdi)
929 palignr $9, %xmm1, %xmm2
930 movaps %xmm2, (%rdi)
931 lea 0x80(%rdi), %rdi
932 jae L(shl_9)
933 movdqu %xmm0, (%r8)
934 add $0x80, %rdx
935 add %rdx, %rdi
936 add %rdx, %rsi
937 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
938
939 .p2align 4
940L(shl_9_bwd):
941 movaps -0x09(%rsi), %xmm1
942
943 movaps -0x19(%rsi), %xmm2
944 palignr $9, %xmm2, %xmm1
945 movaps %xmm1, -0x10(%rdi)
946
947 movaps -0x29(%rsi), %xmm3
948 palignr $9, %xmm3, %xmm2
949 movaps %xmm2, -0x20(%rdi)
950
951 movaps -0x39(%rsi), %xmm4
952 palignr $9, %xmm4, %xmm3
953 movaps %xmm3, -0x30(%rdi)
954
955 movaps -0x49(%rsi), %xmm5
956 palignr $9, %xmm5, %xmm4
957 movaps %xmm4, -0x40(%rdi)
958
959 movaps -0x59(%rsi), %xmm6
960 palignr $9, %xmm6, %xmm5
961 movaps %xmm5, -0x50(%rdi)
962
963 movaps -0x69(%rsi), %xmm7
964 palignr $9, %xmm7, %xmm6
965 movaps %xmm6, -0x60(%rdi)
966
967 movaps -0x79(%rsi), %xmm8
968 palignr $9, %xmm8, %xmm7
969 movaps %xmm7, -0x70(%rdi)
970
971 movaps -0x89(%rsi), %xmm9
972 palignr $9, %xmm9, %xmm8
973 movaps %xmm8, -0x80(%rdi)
974
975 sub $0x80, %rdx
976 lea -0x80(%rdi), %rdi
977 lea -0x80(%rsi), %rsi
978 jae L(shl_9_bwd)
979 movdqu %xmm0, (%r8)
980 add $0x80, %rdx
981 sub %rdx, %rdi
982 sub %rdx, %rsi
983 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
984
985 .p2align 4
986L(shl_10):
987 sub $0x80, %rdx
988 movaps -0x0a(%rsi), %xmm1
989 movaps 0x06(%rsi), %xmm2
990 movaps 0x16(%rsi), %xmm3
991 movaps 0x26(%rsi), %xmm4
992 movaps 0x36(%rsi), %xmm5
993 movaps 0x46(%rsi), %xmm6
994 movaps 0x56(%rsi), %xmm7
995 movaps 0x66(%rsi), %xmm8
996 movaps 0x76(%rsi), %xmm9
997 lea 0x80(%rsi), %rsi
998 palignr $10, %xmm8, %xmm9
999 movaps %xmm9, 0x70(%rdi)
1000 palignr $10, %xmm7, %xmm8
1001 movaps %xmm8, 0x60(%rdi)
1002 palignr $10, %xmm6, %xmm7
1003 movaps %xmm7, 0x50(%rdi)
1004 palignr $10, %xmm5, %xmm6
1005 movaps %xmm6, 0x40(%rdi)
1006 palignr $10, %xmm4, %xmm5
1007 movaps %xmm5, 0x30(%rdi)
1008 palignr $10, %xmm3, %xmm4
1009 movaps %xmm4, 0x20(%rdi)
1010 palignr $10, %xmm2, %xmm3
1011 movaps %xmm3, 0x10(%rdi)
1012 palignr $10, %xmm1, %xmm2
1013 movaps %xmm2, (%rdi)
1014 lea 0x80(%rdi), %rdi
1015 jae L(shl_10)
1016 movdqu %xmm0, (%r8)
1017 add $0x80, %rdx
1018 add %rdx, %rdi
1019 add %rdx, %rsi
1020 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1021
1022 .p2align 4
1023L(shl_10_bwd):
1024 movaps -0x0a(%rsi), %xmm1
1025
1026 movaps -0x1a(%rsi), %xmm2
1027 palignr $10, %xmm2, %xmm1
1028 movaps %xmm1, -0x10(%rdi)
1029
1030 movaps -0x2a(%rsi), %xmm3
1031 palignr $10, %xmm3, %xmm2
1032 movaps %xmm2, -0x20(%rdi)
1033
1034 movaps -0x3a(%rsi), %xmm4
1035 palignr $10, %xmm4, %xmm3
1036 movaps %xmm3, -0x30(%rdi)
1037
1038 movaps -0x4a(%rsi), %xmm5
1039 palignr $10, %xmm5, %xmm4
1040 movaps %xmm4, -0x40(%rdi)
1041
1042 movaps -0x5a(%rsi), %xmm6
1043 palignr $10, %xmm6, %xmm5
1044 movaps %xmm5, -0x50(%rdi)
1045
1046 movaps -0x6a(%rsi), %xmm7
1047 palignr $10, %xmm7, %xmm6
1048 movaps %xmm6, -0x60(%rdi)
1049
1050 movaps -0x7a(%rsi), %xmm8
1051 palignr $10, %xmm8, %xmm7
1052 movaps %xmm7, -0x70(%rdi)
1053
1054 movaps -0x8a(%rsi), %xmm9
1055 palignr $10, %xmm9, %xmm8
1056 movaps %xmm8, -0x80(%rdi)
1057
1058 sub $0x80, %rdx
1059 lea -0x80(%rdi), %rdi
1060 lea -0x80(%rsi), %rsi
1061 jae L(shl_10_bwd)
1062 movdqu %xmm0, (%r8)
1063 add $0x80, %rdx
1064 sub %rdx, %rdi
1065 sub %rdx, %rsi
1066 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1067
1068 .p2align 4
1069L(shl_11):
1070 sub $0x80, %rdx
1071 movaps -0x0b(%rsi), %xmm1
1072 movaps 0x05(%rsi), %xmm2
1073 movaps 0x15(%rsi), %xmm3
1074 movaps 0x25(%rsi), %xmm4
1075 movaps 0x35(%rsi), %xmm5
1076 movaps 0x45(%rsi), %xmm6
1077 movaps 0x55(%rsi), %xmm7
1078 movaps 0x65(%rsi), %xmm8
1079 movaps 0x75(%rsi), %xmm9
1080 lea 0x80(%rsi), %rsi
1081 palignr $11, %xmm8, %xmm9
1082 movaps %xmm9, 0x70(%rdi)
1083 palignr $11, %xmm7, %xmm8
1084 movaps %xmm8, 0x60(%rdi)
1085 palignr $11, %xmm6, %xmm7
1086 movaps %xmm7, 0x50(%rdi)
1087 palignr $11, %xmm5, %xmm6
1088 movaps %xmm6, 0x40(%rdi)
1089 palignr $11, %xmm4, %xmm5
1090 movaps %xmm5, 0x30(%rdi)
1091 palignr $11, %xmm3, %xmm4
1092 movaps %xmm4, 0x20(%rdi)
1093 palignr $11, %xmm2, %xmm3
1094 movaps %xmm3, 0x10(%rdi)
1095 palignr $11, %xmm1, %xmm2
1096 movaps %xmm2, (%rdi)
1097 lea 0x80(%rdi), %rdi
1098 jae L(shl_11)
1099 movdqu %xmm0, (%r8)
1100 add $0x80, %rdx
1101 add %rdx, %rdi
1102 add %rdx, %rsi
1103 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1104
1105 .p2align 4
1106L(shl_11_bwd):
1107 movaps -0x0b(%rsi), %xmm1
1108
1109 movaps -0x1b(%rsi), %xmm2
1110 palignr $11, %xmm2, %xmm1
1111 movaps %xmm1, -0x10(%rdi)
1112
1113 movaps -0x2b(%rsi), %xmm3
1114 palignr $11, %xmm3, %xmm2
1115 movaps %xmm2, -0x20(%rdi)
1116
1117 movaps -0x3b(%rsi), %xmm4
1118 palignr $11, %xmm4, %xmm3
1119 movaps %xmm3, -0x30(%rdi)
1120
1121 movaps -0x4b(%rsi), %xmm5
1122 palignr $11, %xmm5, %xmm4
1123 movaps %xmm4, -0x40(%rdi)
1124
1125 movaps -0x5b(%rsi), %xmm6
1126 palignr $11, %xmm6, %xmm5
1127 movaps %xmm5, -0x50(%rdi)
1128
1129 movaps -0x6b(%rsi), %xmm7
1130 palignr $11, %xmm7, %xmm6
1131 movaps %xmm6, -0x60(%rdi)
1132
1133 movaps -0x7b(%rsi), %xmm8
1134 palignr $11, %xmm8, %xmm7
1135 movaps %xmm7, -0x70(%rdi)
1136
1137 movaps -0x8b(%rsi), %xmm9
1138 palignr $11, %xmm9, %xmm8
1139 movaps %xmm8, -0x80(%rdi)
1140
1141 sub $0x80, %rdx
1142 lea -0x80(%rdi), %rdi
1143 lea -0x80(%rsi), %rsi
1144 jae L(shl_11_bwd)
1145 movdqu %xmm0, (%r8)
1146 add $0x80, %rdx
1147 sub %rdx, %rdi
1148 sub %rdx, %rsi
1149 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1150
1151 .p2align 4
1152L(shl_12):
1153 sub $0x80, %rdx
1154 movdqa -0x0c(%rsi), %xmm1
1155 movaps 0x04(%rsi), %xmm2
1156 movaps 0x14(%rsi), %xmm3
1157 movaps 0x24(%rsi), %xmm4
1158 movaps 0x34(%rsi), %xmm5
1159 movaps 0x44(%rsi), %xmm6
1160 movaps 0x54(%rsi), %xmm7
1161 movaps 0x64(%rsi), %xmm8
1162 movaps 0x74(%rsi), %xmm9
1163 lea 0x80(%rsi), %rsi
1164 palignr $12, %xmm8, %xmm9
1165 movaps %xmm9, 0x70(%rdi)
1166 palignr $12, %xmm7, %xmm8
1167 movaps %xmm8, 0x60(%rdi)
1168 palignr $12, %xmm6, %xmm7
1169 movaps %xmm7, 0x50(%rdi)
1170 palignr $12, %xmm5, %xmm6
1171 movaps %xmm6, 0x40(%rdi)
1172 palignr $12, %xmm4, %xmm5
1173 movaps %xmm5, 0x30(%rdi)
1174 palignr $12, %xmm3, %xmm4
1175 movaps %xmm4, 0x20(%rdi)
1176 palignr $12, %xmm2, %xmm3
1177 movaps %xmm3, 0x10(%rdi)
1178 palignr $12, %xmm1, %xmm2
1179 movaps %xmm2, (%rdi)
1180
1181 lea 0x80(%rdi), %rdi
1182 jae L(shl_12)
1183 movdqu %xmm0, (%r8)
1184 add $0x80, %rdx
1185 add %rdx, %rdi
1186 add %rdx, %rsi
1187 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1188
1189 .p2align 4
1190L(shl_12_bwd):
1191 movaps -0x0c(%rsi), %xmm1
1192
1193 movaps -0x1c(%rsi), %xmm2
1194 palignr $12, %xmm2, %xmm1
1195 movaps %xmm1, -0x10(%rdi)
1196
1197 movaps -0x2c(%rsi), %xmm3
1198 palignr $12, %xmm3, %xmm2
1199 movaps %xmm2, -0x20(%rdi)
1200
1201 movaps -0x3c(%rsi), %xmm4
1202 palignr $12, %xmm4, %xmm3
1203 movaps %xmm3, -0x30(%rdi)
1204
1205 movaps -0x4c(%rsi), %xmm5
1206 palignr $12, %xmm5, %xmm4
1207 movaps %xmm4, -0x40(%rdi)
1208
1209 movaps -0x5c(%rsi), %xmm6
1210 palignr $12, %xmm6, %xmm5
1211 movaps %xmm5, -0x50(%rdi)
1212
1213 movaps -0x6c(%rsi), %xmm7
1214 palignr $12, %xmm7, %xmm6
1215 movaps %xmm6, -0x60(%rdi)
1216
1217 movaps -0x7c(%rsi), %xmm8
1218 palignr $12, %xmm8, %xmm7
1219 movaps %xmm7, -0x70(%rdi)
1220
1221 movaps -0x8c(%rsi), %xmm9
1222 palignr $12, %xmm9, %xmm8
1223 movaps %xmm8, -0x80(%rdi)
1224
1225 sub $0x80, %rdx
1226 lea -0x80(%rdi), %rdi
1227 lea -0x80(%rsi), %rsi
1228 jae L(shl_12_bwd)
1229 movdqu %xmm0, (%r8)
1230 add $0x80, %rdx
1231 sub %rdx, %rdi
1232 sub %rdx, %rsi
1233 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1234
1235 .p2align 4
1236L(shl_13):
1237 sub $0x80, %rdx
1238 movaps -0x0d(%rsi), %xmm1
1239 movaps 0x03(%rsi), %xmm2
1240 movaps 0x13(%rsi), %xmm3
1241 movaps 0x23(%rsi), %xmm4
1242 movaps 0x33(%rsi), %xmm5
1243 movaps 0x43(%rsi), %xmm6
1244 movaps 0x53(%rsi), %xmm7
1245 movaps 0x63(%rsi), %xmm8
1246 movaps 0x73(%rsi), %xmm9
1247 lea 0x80(%rsi), %rsi
1248 palignr $13, %xmm8, %xmm9
1249 movaps %xmm9, 0x70(%rdi)
1250 palignr $13, %xmm7, %xmm8
1251 movaps %xmm8, 0x60(%rdi)
1252 palignr $13, %xmm6, %xmm7
1253 movaps %xmm7, 0x50(%rdi)
1254 palignr $13, %xmm5, %xmm6
1255 movaps %xmm6, 0x40(%rdi)
1256 palignr $13, %xmm4, %xmm5
1257 movaps %xmm5, 0x30(%rdi)
1258 palignr $13, %xmm3, %xmm4
1259 movaps %xmm4, 0x20(%rdi)
1260 palignr $13, %xmm2, %xmm3
1261 movaps %xmm3, 0x10(%rdi)
1262 palignr $13, %xmm1, %xmm2
1263 movaps %xmm2, (%rdi)
1264 lea 0x80(%rdi), %rdi
1265 jae L(shl_13)
1266 movdqu %xmm0, (%r8)
1267 add $0x80, %rdx
1268 add %rdx, %rdi
1269 add %rdx, %rsi
1270 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1271
1272 .p2align 4
1273L(shl_13_bwd):
1274 movaps -0x0d(%rsi), %xmm1
1275
1276 movaps -0x1d(%rsi), %xmm2
1277 palignr $13, %xmm2, %xmm1
1278 movaps %xmm1, -0x10(%rdi)
1279
1280 movaps -0x2d(%rsi), %xmm3
1281 palignr $13, %xmm3, %xmm2
1282 movaps %xmm2, -0x20(%rdi)
1283
1284 movaps -0x3d(%rsi), %xmm4
1285 palignr $13, %xmm4, %xmm3
1286 movaps %xmm3, -0x30(%rdi)
1287
1288 movaps -0x4d(%rsi), %xmm5
1289 palignr $13, %xmm5, %xmm4
1290 movaps %xmm4, -0x40(%rdi)
1291
1292 movaps -0x5d(%rsi), %xmm6
1293 palignr $13, %xmm6, %xmm5
1294 movaps %xmm5, -0x50(%rdi)
1295
1296 movaps -0x6d(%rsi), %xmm7
1297 palignr $13, %xmm7, %xmm6
1298 movaps %xmm6, -0x60(%rdi)
1299
1300 movaps -0x7d(%rsi), %xmm8
1301 palignr $13, %xmm8, %xmm7
1302 movaps %xmm7, -0x70(%rdi)
1303
1304 movaps -0x8d(%rsi), %xmm9
1305 palignr $13, %xmm9, %xmm8
1306 movaps %xmm8, -0x80(%rdi)
1307
1308 sub $0x80, %rdx
1309 lea -0x80(%rdi), %rdi
1310 lea -0x80(%rsi), %rsi
1311 jae L(shl_13_bwd)
1312 movdqu %xmm0, (%r8)
1313 add $0x80, %rdx
1314 sub %rdx, %rdi
1315 sub %rdx, %rsi
1316 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1317
1318 .p2align 4
1319L(shl_14):
1320 sub $0x80, %rdx
1321 movaps -0x0e(%rsi), %xmm1
1322 movaps 0x02(%rsi), %xmm2
1323 movaps 0x12(%rsi), %xmm3
1324 movaps 0x22(%rsi), %xmm4
1325 movaps 0x32(%rsi), %xmm5
1326 movaps 0x42(%rsi), %xmm6
1327 movaps 0x52(%rsi), %xmm7
1328 movaps 0x62(%rsi), %xmm8
1329 movaps 0x72(%rsi), %xmm9
1330 lea 0x80(%rsi), %rsi
1331 palignr $14, %xmm8, %xmm9
1332 movaps %xmm9, 0x70(%rdi)
1333 palignr $14, %xmm7, %xmm8
1334 movaps %xmm8, 0x60(%rdi)
1335 palignr $14, %xmm6, %xmm7
1336 movaps %xmm7, 0x50(%rdi)
1337 palignr $14, %xmm5, %xmm6
1338 movaps %xmm6, 0x40(%rdi)
1339 palignr $14, %xmm4, %xmm5
1340 movaps %xmm5, 0x30(%rdi)
1341 palignr $14, %xmm3, %xmm4
1342 movaps %xmm4, 0x20(%rdi)
1343 palignr $14, %xmm2, %xmm3
1344 movaps %xmm3, 0x10(%rdi)
1345 palignr $14, %xmm1, %xmm2
1346 movaps %xmm2, (%rdi)
1347 lea 0x80(%rdi), %rdi
1348 jae L(shl_14)
1349 movdqu %xmm0, (%r8)
1350 add $0x80, %rdx
1351 add %rdx, %rdi
1352 add %rdx, %rsi
1353 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1354
1355 .p2align 4
1356L(shl_14_bwd):
1357 movaps -0x0e(%rsi), %xmm1
1358
1359 movaps -0x1e(%rsi), %xmm2
1360 palignr $14, %xmm2, %xmm1
1361 movaps %xmm1, -0x10(%rdi)
1362
1363 movaps -0x2e(%rsi), %xmm3
1364 palignr $14, %xmm3, %xmm2
1365 movaps %xmm2, -0x20(%rdi)
1366
1367 movaps -0x3e(%rsi), %xmm4
1368 palignr $14, %xmm4, %xmm3
1369 movaps %xmm3, -0x30(%rdi)
1370
1371 movaps -0x4e(%rsi), %xmm5
1372 palignr $14, %xmm5, %xmm4
1373 movaps %xmm4, -0x40(%rdi)
1374
1375 movaps -0x5e(%rsi), %xmm6
1376 palignr $14, %xmm6, %xmm5
1377 movaps %xmm5, -0x50(%rdi)
1378
1379 movaps -0x6e(%rsi), %xmm7
1380 palignr $14, %xmm7, %xmm6
1381 movaps %xmm6, -0x60(%rdi)
1382
1383 movaps -0x7e(%rsi), %xmm8
1384 palignr $14, %xmm8, %xmm7
1385 movaps %xmm7, -0x70(%rdi)
1386
1387 movaps -0x8e(%rsi), %xmm9
1388 palignr $14, %xmm9, %xmm8
1389 movaps %xmm8, -0x80(%rdi)
1390
1391 sub $0x80, %rdx
1392 lea -0x80(%rdi), %rdi
1393 lea -0x80(%rsi), %rsi
1394 jae L(shl_14_bwd)
1395 movdqu %xmm0, (%r8)
1396 add $0x80, %rdx
1397 sub %rdx, %rdi
1398 sub %rdx, %rsi
1399 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1400
1401 .p2align 4
1402L(shl_15):
1403 sub $0x80, %rdx
1404 movaps -0x0f(%rsi), %xmm1
1405 movaps 0x01(%rsi), %xmm2
1406 movaps 0x11(%rsi), %xmm3
1407 movaps 0x21(%rsi), %xmm4
1408 movaps 0x31(%rsi), %xmm5
1409 movaps 0x41(%rsi), %xmm6
1410 movaps 0x51(%rsi), %xmm7
1411 movaps 0x61(%rsi), %xmm8
1412 movaps 0x71(%rsi), %xmm9
1413 lea 0x80(%rsi), %rsi
1414 palignr $15, %xmm8, %xmm9
1415 movaps %xmm9, 0x70(%rdi)
1416 palignr $15, %xmm7, %xmm8
1417 movaps %xmm8, 0x60(%rdi)
1418 palignr $15, %xmm6, %xmm7
1419 movaps %xmm7, 0x50(%rdi)
1420 palignr $15, %xmm5, %xmm6
1421 movaps %xmm6, 0x40(%rdi)
1422 palignr $15, %xmm4, %xmm5
1423 movaps %xmm5, 0x30(%rdi)
1424 palignr $15, %xmm3, %xmm4
1425 movaps %xmm4, 0x20(%rdi)
1426 palignr $15, %xmm2, %xmm3
1427 movaps %xmm3, 0x10(%rdi)
1428 palignr $15, %xmm1, %xmm2
1429 movaps %xmm2, (%rdi)
1430 lea 0x80(%rdi), %rdi
1431 jae L(shl_15)
1432 movdqu %xmm0, (%r8)
1433 add $0x80, %rdx
1434 add %rdx, %rdi
1435 add %rdx, %rsi
1436 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1437
1438 .p2align 4
1439L(shl_15_bwd):
1440 movaps -0x0f(%rsi), %xmm1
1441
1442 movaps -0x1f(%rsi), %xmm2
1443 palignr $15, %xmm2, %xmm1
1444 movaps %xmm1, -0x10(%rdi)
1445
1446 movaps -0x2f(%rsi), %xmm3
1447 palignr $15, %xmm3, %xmm2
1448 movaps %xmm2, -0x20(%rdi)
1449
1450 movaps -0x3f(%rsi), %xmm4
1451 palignr $15, %xmm4, %xmm3
1452 movaps %xmm3, -0x30(%rdi)
1453
1454 movaps -0x4f(%rsi), %xmm5
1455 palignr $15, %xmm5, %xmm4
1456 movaps %xmm4, -0x40(%rdi)
1457
1458 movaps -0x5f(%rsi), %xmm6
1459 palignr $15, %xmm6, %xmm5
1460 movaps %xmm5, -0x50(%rdi)
1461
1462 movaps -0x6f(%rsi), %xmm7
1463 palignr $15, %xmm7, %xmm6
1464 movaps %xmm6, -0x60(%rdi)
1465
1466 movaps -0x7f(%rsi), %xmm8
1467 palignr $15, %xmm8, %xmm7
1468 movaps %xmm7, -0x70(%rdi)
1469
1470 movaps -0x8f(%rsi), %xmm9
1471 palignr $15, %xmm9, %xmm8
1472 movaps %xmm8, -0x80(%rdi)
1473
1474 sub $0x80, %rdx
1475 lea -0x80(%rdi), %rdi
1476 lea -0x80(%rsi), %rsi
1477 jae L(shl_15_bwd)
1478 movdqu %xmm0, (%r8)
1479 add $0x80, %rdx
1480 sub %rdx, %rdi
1481 sub %rdx, %rsi
1482 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1483
1484 .p2align 4
1485L(gobble_mem_fwd):
1486 movdqu (%rsi), %xmm1
1487 movdqu %xmm0, (%r8)
1488 movdqa %xmm1, (%rdi)
1489 sub $16, %rdx
1490 add $16, %rsi
1491 add $16, %rdi
1492
1493#ifdef SHARED_CACHE_SIZE_HALF
1494 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1495#else
1496 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1497#endif
1498#ifdef USE_AS_MEMMOVE
1499 mov %rsi, %r9
1500 sub %rdi, %r9
1501 cmp %rdx, %r9
1502 jae L(memmove_is_memcpy_fwd)
1503 cmp %rcx, %r9
1504 jbe L(ll_cache_copy_fwd_start)
1505L(memmove_is_memcpy_fwd):
1506#endif
1507 cmp %rcx, %rdx
1508 ja L(bigger_in_fwd)
1509 mov %rdx, %rcx
1510L(bigger_in_fwd):
1511 sub %rcx, %rdx
1512 cmp $0x1000, %rdx
1513 jbe L(ll_cache_copy_fwd)
1514
1515 mov %rcx, %r9
1516 shl $3, %r9
1517 cmp %r9, %rdx
1518 jbe L(2steps_copy_fwd)
1519 add %rcx, %rdx
1520 xor %rcx, %rcx
1521L(2steps_copy_fwd):
1522 sub $0x80, %rdx
1523L(gobble_mem_fwd_loop):
1524 sub $0x80, %rdx
1525 prefetcht0 0x200(%rsi)
1526 prefetcht0 0x300(%rsi)
1527 movdqu (%rsi), %xmm0
1528 movdqu 0x10(%rsi), %xmm1
1529 movdqu 0x20(%rsi), %xmm2
1530 movdqu 0x30(%rsi), %xmm3
1531 movdqu 0x40(%rsi), %xmm4
1532 movdqu 0x50(%rsi), %xmm5
1533 movdqu 0x60(%rsi), %xmm6
1534 movdqu 0x70(%rsi), %xmm7
1535 lfence
1536 movntdq %xmm0, (%rdi)
1537 movntdq %xmm1, 0x10(%rdi)
1538 movntdq %xmm2, 0x20(%rdi)
1539 movntdq %xmm3, 0x30(%rdi)
1540 movntdq %xmm4, 0x40(%rdi)
1541 movntdq %xmm5, 0x50(%rdi)
1542 movntdq %xmm6, 0x60(%rdi)
1543 movntdq %xmm7, 0x70(%rdi)
1544 lea 0x80(%rsi), %rsi
1545 lea 0x80(%rdi), %rdi
1546 jae L(gobble_mem_fwd_loop)
1547 sfence
1548 cmp $0x80, %rcx
1549 jb L(gobble_mem_fwd_end)
1550 add $0x80, %rdx
1551L(ll_cache_copy_fwd):
1552 add %rcx, %rdx
1553L(ll_cache_copy_fwd_start):
1554 sub $0x80, %rdx
1555L(gobble_ll_loop_fwd):
1556 prefetchnta 0x1c0(%rsi)
1557 prefetchnta 0x280(%rsi)
1558 prefetchnta 0x1c0(%rdi)
1559 prefetchnta 0x280(%rdi)
1560 sub $0x80, %rdx
1561 movdqu (%rsi), %xmm0
1562 movdqu 0x10(%rsi), %xmm1
1563 movdqu 0x20(%rsi), %xmm2
1564 movdqu 0x30(%rsi), %xmm3
1565 movdqu 0x40(%rsi), %xmm4
1566 movdqu 0x50(%rsi), %xmm5
1567 movdqu 0x60(%rsi), %xmm6
1568 movdqu 0x70(%rsi), %xmm7
1569 movdqa %xmm0, (%rdi)
1570 movdqa %xmm1, 0x10(%rdi)
1571 movdqa %xmm2, 0x20(%rdi)
1572 movdqa %xmm3, 0x30(%rdi)
1573 movdqa %xmm4, 0x40(%rdi)
1574 movdqa %xmm5, 0x50(%rdi)
1575 movdqa %xmm6, 0x60(%rdi)
1576 movdqa %xmm7, 0x70(%rdi)
1577 lea 0x80(%rsi), %rsi
1578 lea 0x80(%rdi), %rdi
1579 jae L(gobble_ll_loop_fwd)
1580L(gobble_mem_fwd_end):
1581 add $0x80, %rdx
1582 add %rdx, %rsi
1583 add %rdx, %rdi
1584 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1585
1586 .p2align 4
1587L(gobble_mem_bwd):
1588 add %rdx, %rsi
1589 add %rdx, %rdi
1590
1591 movdqu -16(%rsi), %xmm0
1592 lea -16(%rdi), %r8
1593 mov %rdi, %r9
1594 and $-16, %rdi
1595 sub %rdi, %r9
1596 sub %r9, %rsi
1597 sub %r9, %rdx
1598
1599
1600#ifdef SHARED_CACHE_SIZE_HALF
1601 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1602#else
1603 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1604#endif
1605#ifdef USE_AS_MEMMOVE
1606 mov %rdi, %r9
1607 sub %rsi, %r9
1608 cmp %rdx, %r9
1609 jae L(memmove_is_memcpy_bwd)
1610 cmp %rcx, %r9
1611 jbe L(ll_cache_copy_bwd_start)
1612L(memmove_is_memcpy_bwd):
1613#endif
1614 cmp %rcx, %rdx
1615 ja L(bigger)
1616 mov %rdx, %rcx
1617L(bigger):
1618 sub %rcx, %rdx
1619 cmp $0x1000, %rdx
1620 jbe L(ll_cache_copy)
1621
1622 mov %rcx, %r9
1623 shl $3, %r9
1624 cmp %r9, %rdx
1625 jbe L(2steps_copy)
1626 add %rcx, %rdx
1627 xor %rcx, %rcx
1628L(2steps_copy):
1629 sub $0x80, %rdx
1630L(gobble_mem_bwd_loop):
1631 sub $0x80, %rdx
1632 prefetcht0 -0x200(%rsi)
1633 prefetcht0 -0x300(%rsi)
1634 movdqu -0x10(%rsi), %xmm1
1635 movdqu -0x20(%rsi), %xmm2
1636 movdqu -0x30(%rsi), %xmm3
1637 movdqu -0x40(%rsi), %xmm4
1638 movdqu -0x50(%rsi), %xmm5
1639 movdqu -0x60(%rsi), %xmm6
1640 movdqu -0x70(%rsi), %xmm7
1641 movdqu -0x80(%rsi), %xmm8
1642 lfence
1643 movntdq %xmm1, -0x10(%rdi)
1644 movntdq %xmm2, -0x20(%rdi)
1645 movntdq %xmm3, -0x30(%rdi)
1646 movntdq %xmm4, -0x40(%rdi)
1647 movntdq %xmm5, -0x50(%rdi)
1648 movntdq %xmm6, -0x60(%rdi)
1649 movntdq %xmm7, -0x70(%rdi)
1650 movntdq %xmm8, -0x80(%rdi)
1651 lea -0x80(%rsi), %rsi
1652 lea -0x80(%rdi), %rdi
1653 jae L(gobble_mem_bwd_loop)
1654 sfence
1655 cmp $0x80, %rcx
1656 jb L(gobble_mem_bwd_end)
1657 add $0x80, %rdx
1658L(ll_cache_copy):
1659 add %rcx, %rdx
1660L(ll_cache_copy_bwd_start):
1661 sub $0x80, %rdx
1662L(gobble_ll_loop):
1663 prefetchnta -0x1c0(%rsi)
1664 prefetchnta -0x280(%rsi)
1665 prefetchnta -0x1c0(%rdi)
1666 prefetchnta -0x280(%rdi)
1667 sub $0x80, %rdx
1668 movdqu -0x10(%rsi), %xmm1
1669 movdqu -0x20(%rsi), %xmm2
1670 movdqu -0x30(%rsi), %xmm3
1671 movdqu -0x40(%rsi), %xmm4
1672 movdqu -0x50(%rsi), %xmm5
1673 movdqu -0x60(%rsi), %xmm6
1674 movdqu -0x70(%rsi), %xmm7
1675 movdqu -0x80(%rsi), %xmm8
1676 movdqa %xmm1, -0x10(%rdi)
1677 movdqa %xmm2, -0x20(%rdi)
1678 movdqa %xmm3, -0x30(%rdi)
1679 movdqa %xmm4, -0x40(%rdi)
1680 movdqa %xmm5, -0x50(%rdi)
1681 movdqa %xmm6, -0x60(%rdi)
1682 movdqa %xmm7, -0x70(%rdi)
1683 movdqa %xmm8, -0x80(%rdi)
1684 lea -0x80(%rsi), %rsi
1685 lea -0x80(%rdi), %rdi
1686 jae L(gobble_ll_loop)
1687L(gobble_mem_bwd_end):
1688 movdqu %xmm0, (%r8)
1689 add $0x80, %rdx
1690 sub %rdx, %rsi
1691 sub %rdx, %rdi
1692 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1693
1694 .p2align 4
1695L(fwd_write_128bytes):
1696 lddqu -128(%rsi), %xmm0
1697 movdqu %xmm0, -128(%rdi)
1698L(fwd_write_112bytes):
1699 lddqu -112(%rsi), %xmm0
1700 movdqu %xmm0, -112(%rdi)
1701L(fwd_write_96bytes):
1702 lddqu -96(%rsi), %xmm0
1703 movdqu %xmm0, -96(%rdi)
1704L(fwd_write_80bytes):
1705 lddqu -80(%rsi), %xmm0
1706 movdqu %xmm0, -80(%rdi)
1707L(fwd_write_64bytes):
1708 lddqu -64(%rsi), %xmm0
1709 movdqu %xmm0, -64(%rdi)
1710L(fwd_write_48bytes):
1711 lddqu -48(%rsi), %xmm0
1712 movdqu %xmm0, -48(%rdi)
1713L(fwd_write_32bytes):
1714 lddqu -32(%rsi), %xmm0
1715 movdqu %xmm0, -32(%rdi)
1716L(fwd_write_16bytes):
1717 lddqu -16(%rsi), %xmm0
1718 movdqu %xmm0, -16(%rdi)
1719L(fwd_write_0bytes):
1720 ret
1721
1722
1723 .p2align 4
1724L(fwd_write_143bytes):
1725 lddqu -143(%rsi), %xmm0
1726 movdqu %xmm0, -143(%rdi)
1727L(fwd_write_127bytes):
1728 lddqu -127(%rsi), %xmm0
1729 movdqu %xmm0, -127(%rdi)
1730L(fwd_write_111bytes):
1731 lddqu -111(%rsi), %xmm0
1732 movdqu %xmm0, -111(%rdi)
1733L(fwd_write_95bytes):
1734 lddqu -95(%rsi), %xmm0
1735 movdqu %xmm0, -95(%rdi)
1736L(fwd_write_79bytes):
1737 lddqu -79(%rsi), %xmm0
1738 movdqu %xmm0, -79(%rdi)
1739L(fwd_write_63bytes):
1740 lddqu -63(%rsi), %xmm0
1741 movdqu %xmm0, -63(%rdi)
1742L(fwd_write_47bytes):
1743 lddqu -47(%rsi), %xmm0
1744 movdqu %xmm0, -47(%rdi)
1745L(fwd_write_31bytes):
1746 lddqu -31(%rsi), %xmm0
1747 lddqu -16(%rsi), %xmm1
1748 movdqu %xmm0, -31(%rdi)
1749 movdqu %xmm1, -16(%rdi)
1750 ret
1751
1752 .p2align 4
1753L(fwd_write_15bytes):
1754 mov -15(%rsi), %rdx
1755 mov -8(%rsi), %rcx
1756 mov %rdx, -15(%rdi)
1757 mov %rcx, -8(%rdi)
1758 ret
1759
1760 .p2align 4
1761L(fwd_write_142bytes):
1762 lddqu -142(%rsi), %xmm0
1763 movdqu %xmm0, -142(%rdi)
1764L(fwd_write_126bytes):
1765 lddqu -126(%rsi), %xmm0
1766 movdqu %xmm0, -126(%rdi)
1767L(fwd_write_110bytes):
1768 lddqu -110(%rsi), %xmm0
1769 movdqu %xmm0, -110(%rdi)
1770L(fwd_write_94bytes):
1771 lddqu -94(%rsi), %xmm0
1772 movdqu %xmm0, -94(%rdi)
1773L(fwd_write_78bytes):
1774 lddqu -78(%rsi), %xmm0
1775 movdqu %xmm0, -78(%rdi)
1776L(fwd_write_62bytes):
1777 lddqu -62(%rsi), %xmm0
1778 movdqu %xmm0, -62(%rdi)
1779L(fwd_write_46bytes):
1780 lddqu -46(%rsi), %xmm0
1781 movdqu %xmm0, -46(%rdi)
1782L(fwd_write_30bytes):
1783 lddqu -30(%rsi), %xmm0
1784 lddqu -16(%rsi), %xmm1
1785 movdqu %xmm0, -30(%rdi)
1786 movdqu %xmm1, -16(%rdi)
1787 ret
1788
1789 .p2align 4
1790L(fwd_write_14bytes):
1791 mov -14(%rsi), %rdx
1792 mov -8(%rsi), %rcx
1793 mov %rdx, -14(%rdi)
1794 mov %rcx, -8(%rdi)
1795 ret
1796
1797 .p2align 4
1798L(fwd_write_141bytes):
1799 lddqu -141(%rsi), %xmm0
1800 movdqu %xmm0, -141(%rdi)
1801L(fwd_write_125bytes):
1802 lddqu -125(%rsi), %xmm0
1803 movdqu %xmm0, -125(%rdi)
1804L(fwd_write_109bytes):
1805 lddqu -109(%rsi), %xmm0
1806 movdqu %xmm0, -109(%rdi)
1807L(fwd_write_93bytes):
1808 lddqu -93(%rsi), %xmm0
1809 movdqu %xmm0, -93(%rdi)
1810L(fwd_write_77bytes):
1811 lddqu -77(%rsi), %xmm0
1812 movdqu %xmm0, -77(%rdi)
1813L(fwd_write_61bytes):
1814 lddqu -61(%rsi), %xmm0
1815 movdqu %xmm0, -61(%rdi)
1816L(fwd_write_45bytes):
1817 lddqu -45(%rsi), %xmm0
1818 movdqu %xmm0, -45(%rdi)
1819L(fwd_write_29bytes):
1820 lddqu -29(%rsi), %xmm0
1821 lddqu -16(%rsi), %xmm1
1822 movdqu %xmm0, -29(%rdi)
1823 movdqu %xmm1, -16(%rdi)
1824 ret
1825
1826 .p2align 4
1827L(fwd_write_13bytes):
1828 mov -13(%rsi), %rdx
1829 mov -8(%rsi), %rcx
1830 mov %rdx, -13(%rdi)
1831 mov %rcx, -8(%rdi)
1832 ret
1833
1834 .p2align 4
1835L(fwd_write_140bytes):
1836 lddqu -140(%rsi), %xmm0
1837 movdqu %xmm0, -140(%rdi)
1838L(fwd_write_124bytes):
1839 lddqu -124(%rsi), %xmm0
1840 movdqu %xmm0, -124(%rdi)
1841L(fwd_write_108bytes):
1842 lddqu -108(%rsi), %xmm0
1843 movdqu %xmm0, -108(%rdi)
1844L(fwd_write_92bytes):
1845 lddqu -92(%rsi), %xmm0
1846 movdqu %xmm0, -92(%rdi)
1847L(fwd_write_76bytes):
1848 lddqu -76(%rsi), %xmm0
1849 movdqu %xmm0, -76(%rdi)
1850L(fwd_write_60bytes):
1851 lddqu -60(%rsi), %xmm0
1852 movdqu %xmm0, -60(%rdi)
1853L(fwd_write_44bytes):
1854 lddqu -44(%rsi), %xmm0
1855 movdqu %xmm0, -44(%rdi)
1856L(fwd_write_28bytes):
1857 lddqu -28(%rsi), %xmm0
1858 lddqu -16(%rsi), %xmm1
1859 movdqu %xmm0, -28(%rdi)
1860 movdqu %xmm1, -16(%rdi)
1861 ret
1862
1863 .p2align 4
1864L(fwd_write_12bytes):
1865 mov -12(%rsi), %rdx
1866 mov -4(%rsi), %ecx
1867 mov %rdx, -12(%rdi)
1868 mov %ecx, -4(%rdi)
1869 ret
1870
1871 .p2align 4
1872L(fwd_write_139bytes):
1873 lddqu -139(%rsi), %xmm0
1874 movdqu %xmm0, -139(%rdi)
1875L(fwd_write_123bytes):
1876 lddqu -123(%rsi), %xmm0
1877 movdqu %xmm0, -123(%rdi)
1878L(fwd_write_107bytes):
1879 lddqu -107(%rsi), %xmm0
1880 movdqu %xmm0, -107(%rdi)
1881L(fwd_write_91bytes):
1882 lddqu -91(%rsi), %xmm0
1883 movdqu %xmm0, -91(%rdi)
1884L(fwd_write_75bytes):
1885 lddqu -75(%rsi), %xmm0
1886 movdqu %xmm0, -75(%rdi)
1887L(fwd_write_59bytes):
1888 lddqu -59(%rsi), %xmm0
1889 movdqu %xmm0, -59(%rdi)
1890L(fwd_write_43bytes):
1891 lddqu -43(%rsi), %xmm0
1892 movdqu %xmm0, -43(%rdi)
1893L(fwd_write_27bytes):
1894 lddqu -27(%rsi), %xmm0
1895 lddqu -16(%rsi), %xmm1
1896 movdqu %xmm0, -27(%rdi)
1897 movdqu %xmm1, -16(%rdi)
1898 ret
1899
1900 .p2align 4
1901L(fwd_write_11bytes):
1902 mov -11(%rsi), %rdx
1903 mov -4(%rsi), %ecx
1904 mov %rdx, -11(%rdi)
1905 mov %ecx, -4(%rdi)
1906 ret
1907
1908 .p2align 4
1909L(fwd_write_138bytes):
1910 lddqu -138(%rsi), %xmm0
1911 movdqu %xmm0, -138(%rdi)
1912L(fwd_write_122bytes):
1913 lddqu -122(%rsi), %xmm0
1914 movdqu %xmm0, -122(%rdi)
1915L(fwd_write_106bytes):
1916 lddqu -106(%rsi), %xmm0
1917 movdqu %xmm0, -106(%rdi)
1918L(fwd_write_90bytes):
1919 lddqu -90(%rsi), %xmm0
1920 movdqu %xmm0, -90(%rdi)
1921L(fwd_write_74bytes):
1922 lddqu -74(%rsi), %xmm0
1923 movdqu %xmm0, -74(%rdi)
1924L(fwd_write_58bytes):
1925 lddqu -58(%rsi), %xmm0
1926 movdqu %xmm0, -58(%rdi)
1927L(fwd_write_42bytes):
1928 lddqu -42(%rsi), %xmm0
1929 movdqu %xmm0, -42(%rdi)
1930L(fwd_write_26bytes):
1931 lddqu -26(%rsi), %xmm0
1932 lddqu -16(%rsi), %xmm1
1933 movdqu %xmm0, -26(%rdi)
1934 movdqu %xmm1, -16(%rdi)
1935 ret
1936
1937 .p2align 4
1938L(fwd_write_10bytes):
1939 mov -10(%rsi), %rdx
1940 mov -4(%rsi), %ecx
1941 mov %rdx, -10(%rdi)
1942 mov %ecx, -4(%rdi)
1943 ret
1944
1945 .p2align 4
1946L(fwd_write_137bytes):
1947 lddqu -137(%rsi), %xmm0
1948 movdqu %xmm0, -137(%rdi)
1949L(fwd_write_121bytes):
1950 lddqu -121(%rsi), %xmm0
1951 movdqu %xmm0, -121(%rdi)
1952L(fwd_write_105bytes):
1953 lddqu -105(%rsi), %xmm0
1954 movdqu %xmm0, -105(%rdi)
1955L(fwd_write_89bytes):
1956 lddqu -89(%rsi), %xmm0
1957 movdqu %xmm0, -89(%rdi)
1958L(fwd_write_73bytes):
1959 lddqu -73(%rsi), %xmm0
1960 movdqu %xmm0, -73(%rdi)
1961L(fwd_write_57bytes):
1962 lddqu -57(%rsi), %xmm0
1963 movdqu %xmm0, -57(%rdi)
1964L(fwd_write_41bytes):
1965 lddqu -41(%rsi), %xmm0
1966 movdqu %xmm0, -41(%rdi)
1967L(fwd_write_25bytes):
1968 lddqu -25(%rsi), %xmm0
1969 lddqu -16(%rsi), %xmm1
1970 movdqu %xmm0, -25(%rdi)
1971 movdqu %xmm1, -16(%rdi)
1972 ret
1973
1974 .p2align 4
1975L(fwd_write_9bytes):
1976 mov -9(%rsi), %rdx
1977 mov -4(%rsi), %ecx
1978 mov %rdx, -9(%rdi)
1979 mov %ecx, -4(%rdi)
1980 ret
1981
1982 .p2align 4
1983L(fwd_write_136bytes):
1984 lddqu -136(%rsi), %xmm0
1985 movdqu %xmm0, -136(%rdi)
1986L(fwd_write_120bytes):
1987 lddqu -120(%rsi), %xmm0
1988 movdqu %xmm0, -120(%rdi)
1989L(fwd_write_104bytes):
1990 lddqu -104(%rsi), %xmm0
1991 movdqu %xmm0, -104(%rdi)
1992L(fwd_write_88bytes):
1993 lddqu -88(%rsi), %xmm0
1994 movdqu %xmm0, -88(%rdi)
1995L(fwd_write_72bytes):
1996 lddqu -72(%rsi), %xmm0
1997 movdqu %xmm0, -72(%rdi)
1998L(fwd_write_56bytes):
1999 lddqu -56(%rsi), %xmm0
2000 movdqu %xmm0, -56(%rdi)
2001L(fwd_write_40bytes):
2002 lddqu -40(%rsi), %xmm0
2003 movdqu %xmm0, -40(%rdi)
2004L(fwd_write_24bytes):
2005 lddqu -24(%rsi), %xmm0
2006 lddqu -16(%rsi), %xmm1
2007 movdqu %xmm0, -24(%rdi)
2008 movdqu %xmm1, -16(%rdi)
2009 ret
2010
2011 .p2align 4
2012L(fwd_write_8bytes):
2013 mov -8(%rsi), %rdx
2014 mov %rdx, -8(%rdi)
2015 ret
2016
2017 .p2align 4
2018L(fwd_write_135bytes):
2019 lddqu -135(%rsi), %xmm0
2020 movdqu %xmm0, -135(%rdi)
2021L(fwd_write_119bytes):
2022 lddqu -119(%rsi), %xmm0
2023 movdqu %xmm0, -119(%rdi)
2024L(fwd_write_103bytes):
2025 lddqu -103(%rsi), %xmm0
2026 movdqu %xmm0, -103(%rdi)
2027L(fwd_write_87bytes):
2028 lddqu -87(%rsi), %xmm0
2029 movdqu %xmm0, -87(%rdi)
2030L(fwd_write_71bytes):
2031 lddqu -71(%rsi), %xmm0
2032 movdqu %xmm0, -71(%rdi)
2033L(fwd_write_55bytes):
2034 lddqu -55(%rsi), %xmm0
2035 movdqu %xmm0, -55(%rdi)
2036L(fwd_write_39bytes):
2037 lddqu -39(%rsi), %xmm0
2038 movdqu %xmm0, -39(%rdi)
2039L(fwd_write_23bytes):
2040 lddqu -23(%rsi), %xmm0
2041 lddqu -16(%rsi), %xmm1
2042 movdqu %xmm0, -23(%rdi)
2043 movdqu %xmm1, -16(%rdi)
2044 ret
2045
2046 .p2align 4
2047L(fwd_write_7bytes):
2048 mov -7(%rsi), %edx
2049 mov -4(%rsi), %ecx
2050 mov %edx, -7(%rdi)
2051 mov %ecx, -4(%rdi)
2052 ret
2053
2054 .p2align 4
2055L(fwd_write_134bytes):
2056 lddqu -134(%rsi), %xmm0
2057 movdqu %xmm0, -134(%rdi)
2058L(fwd_write_118bytes):
2059 lddqu -118(%rsi), %xmm0
2060 movdqu %xmm0, -118(%rdi)
2061L(fwd_write_102bytes):
2062 lddqu -102(%rsi), %xmm0
2063 movdqu %xmm0, -102(%rdi)
2064L(fwd_write_86bytes):
2065 lddqu -86(%rsi), %xmm0
2066 movdqu %xmm0, -86(%rdi)
2067L(fwd_write_70bytes):
2068 lddqu -70(%rsi), %xmm0
2069 movdqu %xmm0, -70(%rdi)
2070L(fwd_write_54bytes):
2071 lddqu -54(%rsi), %xmm0
2072 movdqu %xmm0, -54(%rdi)
2073L(fwd_write_38bytes):
2074 lddqu -38(%rsi), %xmm0
2075 movdqu %xmm0, -38(%rdi)
2076L(fwd_write_22bytes):
2077 lddqu -22(%rsi), %xmm0
2078 lddqu -16(%rsi), %xmm1
2079 movdqu %xmm0, -22(%rdi)
2080 movdqu %xmm1, -16(%rdi)
2081 ret
2082
2083 .p2align 4
2084L(fwd_write_6bytes):
2085 mov -6(%rsi), %edx
2086 mov -4(%rsi), %ecx
2087 mov %edx, -6(%rdi)
2088 mov %ecx, -4(%rdi)
2089 ret
2090
2091 .p2align 4
2092L(fwd_write_133bytes):
2093 lddqu -133(%rsi), %xmm0
2094 movdqu %xmm0, -133(%rdi)
2095L(fwd_write_117bytes):
2096 lddqu -117(%rsi), %xmm0
2097 movdqu %xmm0, -117(%rdi)
2098L(fwd_write_101bytes):
2099 lddqu -101(%rsi), %xmm0
2100 movdqu %xmm0, -101(%rdi)
2101L(fwd_write_85bytes):
2102 lddqu -85(%rsi), %xmm0
2103 movdqu %xmm0, -85(%rdi)
2104L(fwd_write_69bytes):
2105 lddqu -69(%rsi), %xmm0
2106 movdqu %xmm0, -69(%rdi)
2107L(fwd_write_53bytes):
2108 lddqu -53(%rsi), %xmm0
2109 movdqu %xmm0, -53(%rdi)
2110L(fwd_write_37bytes):
2111 lddqu -37(%rsi), %xmm0
2112 movdqu %xmm0, -37(%rdi)
2113L(fwd_write_21bytes):
2114 lddqu -21(%rsi), %xmm0
2115 lddqu -16(%rsi), %xmm1
2116 movdqu %xmm0, -21(%rdi)
2117 movdqu %xmm1, -16(%rdi)
2118 ret
2119
2120 .p2align 4
2121L(fwd_write_5bytes):
2122 mov -5(%rsi), %edx
2123 mov -4(%rsi), %ecx
2124 mov %edx, -5(%rdi)
2125 mov %ecx, -4(%rdi)
2126 ret
2127
2128 .p2align 4
2129L(fwd_write_132bytes):
2130 lddqu -132(%rsi), %xmm0
2131 movdqu %xmm0, -132(%rdi)
2132L(fwd_write_116bytes):
2133 lddqu -116(%rsi), %xmm0
2134 movdqu %xmm0, -116(%rdi)
2135L(fwd_write_100bytes):
2136 lddqu -100(%rsi), %xmm0
2137 movdqu %xmm0, -100(%rdi)
2138L(fwd_write_84bytes):
2139 lddqu -84(%rsi), %xmm0
2140 movdqu %xmm0, -84(%rdi)
2141L(fwd_write_68bytes):
2142 lddqu -68(%rsi), %xmm0
2143 movdqu %xmm0, -68(%rdi)
2144L(fwd_write_52bytes):
2145 lddqu -52(%rsi), %xmm0
2146 movdqu %xmm0, -52(%rdi)
2147L(fwd_write_36bytes):
2148 lddqu -36(%rsi), %xmm0
2149 movdqu %xmm0, -36(%rdi)
2150L(fwd_write_20bytes):
2151 lddqu -20(%rsi), %xmm0
2152 lddqu -16(%rsi), %xmm1
2153 movdqu %xmm0, -20(%rdi)
2154 movdqu %xmm1, -16(%rdi)
2155 ret
2156
2157 .p2align 4
2158L(fwd_write_4bytes):
2159 mov -4(%rsi), %edx
2160 mov %edx, -4(%rdi)
2161 ret
2162
2163 .p2align 4
2164L(fwd_write_131bytes):
2165 lddqu -131(%rsi), %xmm0
2166 movdqu %xmm0, -131(%rdi)
2167L(fwd_write_115bytes):
2168 lddqu -115(%rsi), %xmm0
2169 movdqu %xmm0, -115(%rdi)
2170L(fwd_write_99bytes):
2171 lddqu -99(%rsi), %xmm0
2172 movdqu %xmm0, -99(%rdi)
2173L(fwd_write_83bytes):
2174 lddqu -83(%rsi), %xmm0
2175 movdqu %xmm0, -83(%rdi)
2176L(fwd_write_67bytes):
2177 lddqu -67(%rsi), %xmm0
2178 movdqu %xmm0, -67(%rdi)
2179L(fwd_write_51bytes):
2180 lddqu -51(%rsi), %xmm0
2181 movdqu %xmm0, -51(%rdi)
2182L(fwd_write_35bytes):
2183 lddqu -35(%rsi), %xmm0
2184 movdqu %xmm0, -35(%rdi)
2185L(fwd_write_19bytes):
2186 lddqu -19(%rsi), %xmm0
2187 lddqu -16(%rsi), %xmm1
2188 movdqu %xmm0, -19(%rdi)
2189 movdqu %xmm1, -16(%rdi)
2190 ret
2191
2192 .p2align 4
2193L(fwd_write_3bytes):
2194 mov -3(%rsi), %dx
2195 mov -2(%rsi), %cx
2196 mov %dx, -3(%rdi)
2197 mov %cx, -2(%rdi)
2198 ret
2199
2200 .p2align 4
2201L(fwd_write_130bytes):
2202 lddqu -130(%rsi), %xmm0
2203 movdqu %xmm0, -130(%rdi)
2204L(fwd_write_114bytes):
2205 lddqu -114(%rsi), %xmm0
2206 movdqu %xmm0, -114(%rdi)
2207L(fwd_write_98bytes):
2208 lddqu -98(%rsi), %xmm0
2209 movdqu %xmm0, -98(%rdi)
2210L(fwd_write_82bytes):
2211 lddqu -82(%rsi), %xmm0
2212 movdqu %xmm0, -82(%rdi)
2213L(fwd_write_66bytes):
2214 lddqu -66(%rsi), %xmm0
2215 movdqu %xmm0, -66(%rdi)
2216L(fwd_write_50bytes):
2217 lddqu -50(%rsi), %xmm0
2218 movdqu %xmm0, -50(%rdi)
2219L(fwd_write_34bytes):
2220 lddqu -34(%rsi), %xmm0
2221 movdqu %xmm0, -34(%rdi)
2222L(fwd_write_18bytes):
2223 lddqu -18(%rsi), %xmm0
2224 lddqu -16(%rsi), %xmm1
2225 movdqu %xmm0, -18(%rdi)
2226 movdqu %xmm1, -16(%rdi)
2227 ret
2228
2229 .p2align 4
2230L(fwd_write_2bytes):
2231 movzwl -2(%rsi), %edx
2232 mov %dx, -2(%rdi)
2233 ret
2234
2235 .p2align 4
2236L(fwd_write_129bytes):
2237 lddqu -129(%rsi), %xmm0
2238 movdqu %xmm0, -129(%rdi)
2239L(fwd_write_113bytes):
2240 lddqu -113(%rsi), %xmm0
2241 movdqu %xmm0, -113(%rdi)
2242L(fwd_write_97bytes):
2243 lddqu -97(%rsi), %xmm0
2244 movdqu %xmm0, -97(%rdi)
2245L(fwd_write_81bytes):
2246 lddqu -81(%rsi), %xmm0
2247 movdqu %xmm0, -81(%rdi)
2248L(fwd_write_65bytes):
2249 lddqu -65(%rsi), %xmm0
2250 movdqu %xmm0, -65(%rdi)
2251L(fwd_write_49bytes):
2252 lddqu -49(%rsi), %xmm0
2253 movdqu %xmm0, -49(%rdi)
2254L(fwd_write_33bytes):
2255 lddqu -33(%rsi), %xmm0
2256 movdqu %xmm0, -33(%rdi)
2257L(fwd_write_17bytes):
2258 lddqu -17(%rsi), %xmm0
2259 lddqu -16(%rsi), %xmm1
2260 movdqu %xmm0, -17(%rdi)
2261 movdqu %xmm1, -16(%rdi)
2262 ret
2263
2264 .p2align 4
2265L(fwd_write_1bytes):
2266 movzbl -1(%rsi), %edx
2267 mov %dl, -1(%rdi)
2268 ret
2269
2270 .p2align 4
2271L(bwd_write_128bytes):
2272 lddqu 112(%rsi), %xmm0
2273 movdqu %xmm0, 112(%rdi)
2274L(bwd_write_112bytes):
2275 lddqu 96(%rsi), %xmm0
2276 movdqu %xmm0, 96(%rdi)
2277L(bwd_write_96bytes):
2278 lddqu 80(%rsi), %xmm0
2279 movdqu %xmm0, 80(%rdi)
2280L(bwd_write_80bytes):
2281 lddqu 64(%rsi), %xmm0
2282 movdqu %xmm0, 64(%rdi)
2283L(bwd_write_64bytes):
2284 lddqu 48(%rsi), %xmm0
2285 movdqu %xmm0, 48(%rdi)
2286L(bwd_write_48bytes):
2287 lddqu 32(%rsi), %xmm0
2288 movdqu %xmm0, 32(%rdi)
2289L(bwd_write_32bytes):
2290 lddqu 16(%rsi), %xmm0
2291 movdqu %xmm0, 16(%rdi)
2292L(bwd_write_16bytes):
2293 lddqu (%rsi), %xmm0
2294 movdqu %xmm0, (%rdi)
2295L(bwd_write_0bytes):
2296 ret
2297
2298 .p2align 4
2299L(bwd_write_143bytes):
2300 lddqu 127(%rsi), %xmm0
2301 movdqu %xmm0, 127(%rdi)
2302L(bwd_write_127bytes):
2303 lddqu 111(%rsi), %xmm0
2304 movdqu %xmm0, 111(%rdi)
2305L(bwd_write_111bytes):
2306 lddqu 95(%rsi), %xmm0
2307 movdqu %xmm0, 95(%rdi)
2308L(bwd_write_95bytes):
2309 lddqu 79(%rsi), %xmm0
2310 movdqu %xmm0, 79(%rdi)
2311L(bwd_write_79bytes):
2312 lddqu 63(%rsi), %xmm0
2313 movdqu %xmm0, 63(%rdi)
2314L(bwd_write_63bytes):
2315 lddqu 47(%rsi), %xmm0
2316 movdqu %xmm0, 47(%rdi)
2317L(bwd_write_47bytes):
2318 lddqu 31(%rsi), %xmm0
2319 movdqu %xmm0, 31(%rdi)
2320L(bwd_write_31bytes):
2321 lddqu 15(%rsi), %xmm0
2322 lddqu (%rsi), %xmm1
2323 movdqu %xmm0, 15(%rdi)
2324 movdqu %xmm1, (%rdi)
2325 ret
2326
2327
2328 .p2align 4
2329L(bwd_write_15bytes):
2330 mov 7(%rsi), %rdx
2331 mov (%rsi), %rcx
2332 mov %rdx, 7(%rdi)
2333 mov %rcx, (%rdi)
2334 ret
2335
2336 .p2align 4
2337L(bwd_write_142bytes):
2338 lddqu 126(%rsi), %xmm0
2339 movdqu %xmm0, 126(%rdi)
2340L(bwd_write_126bytes):
2341 lddqu 110(%rsi), %xmm0
2342 movdqu %xmm0, 110(%rdi)
2343L(bwd_write_110bytes):
2344 lddqu 94(%rsi), %xmm0
2345 movdqu %xmm0, 94(%rdi)
2346L(bwd_write_94bytes):
2347 lddqu 78(%rsi), %xmm0
2348 movdqu %xmm0, 78(%rdi)
2349L(bwd_write_78bytes):
2350 lddqu 62(%rsi), %xmm0
2351 movdqu %xmm0, 62(%rdi)
2352L(bwd_write_62bytes):
2353 lddqu 46(%rsi), %xmm0
2354 movdqu %xmm0, 46(%rdi)
2355L(bwd_write_46bytes):
2356 lddqu 30(%rsi), %xmm0
2357 movdqu %xmm0, 30(%rdi)
2358L(bwd_write_30bytes):
2359 lddqu 14(%rsi), %xmm0
2360 lddqu (%rsi), %xmm1
2361 movdqu %xmm0, 14(%rdi)
2362 movdqu %xmm1, (%rdi)
2363 ret
2364
2365 .p2align 4
2366L(bwd_write_14bytes):
2367 mov 6(%rsi), %rdx
2368 mov (%rsi), %rcx
2369 mov %rdx, 6(%rdi)
2370 mov %rcx, (%rdi)
2371 ret
2372
2373 .p2align 4
2374L(bwd_write_141bytes):
2375 lddqu 125(%rsi), %xmm0
2376 movdqu %xmm0, 125(%rdi)
2377L(bwd_write_125bytes):
2378 lddqu 109(%rsi), %xmm0
2379 movdqu %xmm0, 109(%rdi)
2380L(bwd_write_109bytes):
2381 lddqu 93(%rsi), %xmm0
2382 movdqu %xmm0, 93(%rdi)
2383L(bwd_write_93bytes):
2384 lddqu 77(%rsi), %xmm0
2385 movdqu %xmm0, 77(%rdi)
2386L(bwd_write_77bytes):
2387 lddqu 61(%rsi), %xmm0
2388 movdqu %xmm0, 61(%rdi)
2389L(bwd_write_61bytes):
2390 lddqu 45(%rsi), %xmm0
2391 movdqu %xmm0, 45(%rdi)
2392L(bwd_write_45bytes):
2393 lddqu 29(%rsi), %xmm0
2394 movdqu %xmm0, 29(%rdi)
2395L(bwd_write_29bytes):
2396 lddqu 13(%rsi), %xmm0
2397 lddqu (%rsi), %xmm1
2398 movdqu %xmm0, 13(%rdi)
2399 movdqu %xmm1, (%rdi)
2400 ret
2401
2402 .p2align 4
2403L(bwd_write_13bytes):
2404 mov 5(%rsi), %rdx
2405 mov (%rsi), %rcx
2406 mov %rdx, 5(%rdi)
2407 mov %rcx, (%rdi)
2408 ret
2409
2410 .p2align 4
2411L(bwd_write_140bytes):
2412 lddqu 124(%rsi), %xmm0
2413 movdqu %xmm0, 124(%rdi)
2414L(bwd_write_124bytes):
2415 lddqu 108(%rsi), %xmm0
2416 movdqu %xmm0, 108(%rdi)
2417L(bwd_write_108bytes):
2418 lddqu 92(%rsi), %xmm0
2419 movdqu %xmm0, 92(%rdi)
2420L(bwd_write_92bytes):
2421 lddqu 76(%rsi), %xmm0
2422 movdqu %xmm0, 76(%rdi)
2423L(bwd_write_76bytes):
2424 lddqu 60(%rsi), %xmm0
2425 movdqu %xmm0, 60(%rdi)
2426L(bwd_write_60bytes):
2427 lddqu 44(%rsi), %xmm0
2428 movdqu %xmm0, 44(%rdi)
2429L(bwd_write_44bytes):
2430 lddqu 28(%rsi), %xmm0
2431 movdqu %xmm0, 28(%rdi)
2432L(bwd_write_28bytes):
2433 lddqu 12(%rsi), %xmm0
2434 lddqu (%rsi), %xmm1
2435 movdqu %xmm0, 12(%rdi)
2436 movdqu %xmm1, (%rdi)
2437 ret
2438
2439 .p2align 4
2440L(bwd_write_12bytes):
2441 mov 4(%rsi), %rdx
2442 mov (%rsi), %rcx
2443 mov %rdx, 4(%rdi)
2444 mov %rcx, (%rdi)
2445 ret
2446
2447 .p2align 4
2448L(bwd_write_139bytes):
2449 lddqu 123(%rsi), %xmm0
2450 movdqu %xmm0, 123(%rdi)
2451L(bwd_write_123bytes):
2452 lddqu 107(%rsi), %xmm0
2453 movdqu %xmm0, 107(%rdi)
2454L(bwd_write_107bytes):
2455 lddqu 91(%rsi), %xmm0
2456 movdqu %xmm0, 91(%rdi)
2457L(bwd_write_91bytes):
2458 lddqu 75(%rsi), %xmm0
2459 movdqu %xmm0, 75(%rdi)
2460L(bwd_write_75bytes):
2461 lddqu 59(%rsi), %xmm0
2462 movdqu %xmm0, 59(%rdi)
2463L(bwd_write_59bytes):
2464 lddqu 43(%rsi), %xmm0
2465 movdqu %xmm0, 43(%rdi)
2466L(bwd_write_43bytes):
2467 lddqu 27(%rsi), %xmm0
2468 movdqu %xmm0, 27(%rdi)
2469L(bwd_write_27bytes):
2470 lddqu 11(%rsi), %xmm0
2471 lddqu (%rsi), %xmm1
2472 movdqu %xmm0, 11(%rdi)
2473 movdqu %xmm1, (%rdi)
2474 ret
2475
2476 .p2align 4
2477L(bwd_write_11bytes):
2478 mov 3(%rsi), %rdx
2479 mov (%rsi), %rcx
2480 mov %rdx, 3(%rdi)
2481 mov %rcx, (%rdi)
2482 ret
2483
2484 .p2align 4
2485L(bwd_write_138bytes):
2486 lddqu 122(%rsi), %xmm0
2487 movdqu %xmm0, 122(%rdi)
2488L(bwd_write_122bytes):
2489 lddqu 106(%rsi), %xmm0
2490 movdqu %xmm0, 106(%rdi)
2491L(bwd_write_106bytes):
2492 lddqu 90(%rsi), %xmm0
2493 movdqu %xmm0, 90(%rdi)
2494L(bwd_write_90bytes):
2495 lddqu 74(%rsi), %xmm0
2496 movdqu %xmm0, 74(%rdi)
2497L(bwd_write_74bytes):
2498 lddqu 58(%rsi), %xmm0
2499 movdqu %xmm0, 58(%rdi)
2500L(bwd_write_58bytes):
2501 lddqu 42(%rsi), %xmm0
2502 movdqu %xmm0, 42(%rdi)
2503L(bwd_write_42bytes):
2504 lddqu 26(%rsi), %xmm0
2505 movdqu %xmm0, 26(%rdi)
2506L(bwd_write_26bytes):
2507 lddqu 10(%rsi), %xmm0
2508 lddqu (%rsi), %xmm1
2509 movdqu %xmm0, 10(%rdi)
2510 movdqu %xmm1, (%rdi)
2511 ret
2512
2513 .p2align 4
2514L(bwd_write_10bytes):
2515 mov 2(%rsi), %rdx
2516 mov (%rsi), %rcx
2517 mov %rdx, 2(%rdi)
2518 mov %rcx, (%rdi)
2519 ret
2520
2521 .p2align 4
2522L(bwd_write_137bytes):
2523 lddqu 121(%rsi), %xmm0
2524 movdqu %xmm0, 121(%rdi)
2525L(bwd_write_121bytes):
2526 lddqu 105(%rsi), %xmm0
2527 movdqu %xmm0, 105(%rdi)
2528L(bwd_write_105bytes):
2529 lddqu 89(%rsi), %xmm0
2530 movdqu %xmm0, 89(%rdi)
2531L(bwd_write_89bytes):
2532 lddqu 73(%rsi), %xmm0
2533 movdqu %xmm0, 73(%rdi)
2534L(bwd_write_73bytes):
2535 lddqu 57(%rsi), %xmm0
2536 movdqu %xmm0, 57(%rdi)
2537L(bwd_write_57bytes):
2538 lddqu 41(%rsi), %xmm0
2539 movdqu %xmm0, 41(%rdi)
2540L(bwd_write_41bytes):
2541 lddqu 25(%rsi), %xmm0
2542 movdqu %xmm0, 25(%rdi)
2543L(bwd_write_25bytes):
2544 lddqu 9(%rsi), %xmm0
2545 lddqu (%rsi), %xmm1
2546 movdqu %xmm0, 9(%rdi)
2547 movdqu %xmm1, (%rdi)
2548 ret
2549
2550 .p2align 4
2551L(bwd_write_9bytes):
2552 mov 1(%rsi), %rdx
2553 mov (%rsi), %rcx
2554 mov %rdx, 1(%rdi)
2555 mov %rcx, (%rdi)
2556 ret
2557
2558 .p2align 4
2559L(bwd_write_136bytes):
2560 lddqu 120(%rsi), %xmm0
2561 movdqu %xmm0, 120(%rdi)
2562L(bwd_write_120bytes):
2563 lddqu 104(%rsi), %xmm0
2564 movdqu %xmm0, 104(%rdi)
2565L(bwd_write_104bytes):
2566 lddqu 88(%rsi), %xmm0
2567 movdqu %xmm0, 88(%rdi)
2568L(bwd_write_88bytes):
2569 lddqu 72(%rsi), %xmm0
2570 movdqu %xmm0, 72(%rdi)
2571L(bwd_write_72bytes):
2572 lddqu 56(%rsi), %xmm0
2573 movdqu %xmm0, 56(%rdi)
2574L(bwd_write_56bytes):
2575 lddqu 40(%rsi), %xmm0
2576 movdqu %xmm0, 40(%rdi)
2577L(bwd_write_40bytes):
2578 lddqu 24(%rsi), %xmm0
2579 movdqu %xmm0, 24(%rdi)
2580L(bwd_write_24bytes):
2581 lddqu 8(%rsi), %xmm0
2582 lddqu (%rsi), %xmm1
2583 movdqu %xmm0, 8(%rdi)
2584 movdqu %xmm1, (%rdi)
2585 ret
2586
2587 .p2align 4
2588L(bwd_write_8bytes):
2589 mov (%rsi), %rdx
2590 mov %rdx, (%rdi)
2591 ret
2592
2593 .p2align 4
2594L(bwd_write_135bytes):
2595 lddqu 119(%rsi), %xmm0
2596 movdqu %xmm0, 119(%rdi)
2597L(bwd_write_119bytes):
2598 lddqu 103(%rsi), %xmm0
2599 movdqu %xmm0, 103(%rdi)
2600L(bwd_write_103bytes):
2601 lddqu 87(%rsi), %xmm0
2602 movdqu %xmm0, 87(%rdi)
2603L(bwd_write_87bytes):
2604 lddqu 71(%rsi), %xmm0
2605 movdqu %xmm0, 71(%rdi)
2606L(bwd_write_71bytes):
2607 lddqu 55(%rsi), %xmm0
2608 movdqu %xmm0, 55(%rdi)
2609L(bwd_write_55bytes):
2610 lddqu 39(%rsi), %xmm0
2611 movdqu %xmm0, 39(%rdi)
2612L(bwd_write_39bytes):
2613 lddqu 23(%rsi), %xmm0
2614 movdqu %xmm0, 23(%rdi)
2615L(bwd_write_23bytes):
2616 lddqu 7(%rsi), %xmm0
2617 lddqu (%rsi), %xmm1
2618 movdqu %xmm0, 7(%rdi)
2619 movdqu %xmm1, (%rdi)
2620 ret
2621
2622 .p2align 4
2623L(bwd_write_7bytes):
2624 mov 3(%rsi), %edx
2625 mov (%rsi), %ecx
2626 mov %edx, 3(%rdi)
2627 mov %ecx, (%rdi)
2628 ret
2629
2630 .p2align 4
2631L(bwd_write_134bytes):
2632 lddqu 118(%rsi), %xmm0
2633 movdqu %xmm0, 118(%rdi)
2634L(bwd_write_118bytes):
2635 lddqu 102(%rsi), %xmm0
2636 movdqu %xmm0, 102(%rdi)
2637L(bwd_write_102bytes):
2638 lddqu 86(%rsi), %xmm0
2639 movdqu %xmm0, 86(%rdi)
2640L(bwd_write_86bytes):
2641 lddqu 70(%rsi), %xmm0
2642 movdqu %xmm0, 70(%rdi)
2643L(bwd_write_70bytes):
2644 lddqu 54(%rsi), %xmm0
2645 movdqu %xmm0, 54(%rdi)
2646L(bwd_write_54bytes):
2647 lddqu 38(%rsi), %xmm0
2648 movdqu %xmm0, 38(%rdi)
2649L(bwd_write_38bytes):
2650 lddqu 22(%rsi), %xmm0
2651 movdqu %xmm0, 22(%rdi)
2652L(bwd_write_22bytes):
2653 lddqu 6(%rsi), %xmm0
2654 lddqu (%rsi), %xmm1
2655 movdqu %xmm0, 6(%rdi)
2656 movdqu %xmm1, (%rdi)
2657 ret
2658
2659 .p2align 4
2660L(bwd_write_6bytes):
2661 mov 2(%rsi), %edx
2662 mov (%rsi), %ecx
2663 mov %edx, 2(%rdi)
2664 mov %ecx, (%rdi)
2665 ret
2666
2667 .p2align 4
2668L(bwd_write_133bytes):
2669 lddqu 117(%rsi), %xmm0
2670 movdqu %xmm0, 117(%rdi)
2671L(bwd_write_117bytes):
2672 lddqu 101(%rsi), %xmm0
2673 movdqu %xmm0, 101(%rdi)
2674L(bwd_write_101bytes):
2675 lddqu 85(%rsi), %xmm0
2676 movdqu %xmm0, 85(%rdi)
2677L(bwd_write_85bytes):
2678 lddqu 69(%rsi), %xmm0
2679 movdqu %xmm0, 69(%rdi)
2680L(bwd_write_69bytes):
2681 lddqu 53(%rsi), %xmm0
2682 movdqu %xmm0, 53(%rdi)
2683L(bwd_write_53bytes):
2684 lddqu 37(%rsi), %xmm0
2685 movdqu %xmm0, 37(%rdi)
2686L(bwd_write_37bytes):
2687 lddqu 21(%rsi), %xmm0
2688 movdqu %xmm0, 21(%rdi)
2689L(bwd_write_21bytes):
2690 lddqu 5(%rsi), %xmm0
2691 lddqu (%rsi), %xmm1
2692 movdqu %xmm0, 5(%rdi)
2693 movdqu %xmm1, (%rdi)
2694 ret
2695
2696 .p2align 4
2697L(bwd_write_5bytes):
2698 mov 1(%rsi), %edx
2699 mov (%rsi), %ecx
2700 mov %edx, 1(%rdi)
2701 mov %ecx, (%rdi)
2702 ret
2703
2704 .p2align 4
2705L(bwd_write_132bytes):
2706 lddqu 116(%rsi), %xmm0
2707 movdqu %xmm0, 116(%rdi)
2708L(bwd_write_116bytes):
2709 lddqu 100(%rsi), %xmm0
2710 movdqu %xmm0, 100(%rdi)
2711L(bwd_write_100bytes):
2712 lddqu 84(%rsi), %xmm0
2713 movdqu %xmm0, 84(%rdi)
2714L(bwd_write_84bytes):
2715 lddqu 68(%rsi), %xmm0
2716 movdqu %xmm0, 68(%rdi)
2717L(bwd_write_68bytes):
2718 lddqu 52(%rsi), %xmm0
2719 movdqu %xmm0, 52(%rdi)
2720L(bwd_write_52bytes):
2721 lddqu 36(%rsi), %xmm0
2722 movdqu %xmm0, 36(%rdi)
2723L(bwd_write_36bytes):
2724 lddqu 20(%rsi), %xmm0
2725 movdqu %xmm0, 20(%rdi)
2726L(bwd_write_20bytes):
2727 lddqu 4(%rsi), %xmm0
2728 lddqu (%rsi), %xmm1
2729 movdqu %xmm0, 4(%rdi)
2730 movdqu %xmm1, (%rdi)
2731 ret
2732
2733 .p2align 4
2734L(bwd_write_4bytes):
2735 mov (%rsi), %edx
2736 mov %edx, (%rdi)
2737 ret
2738
2739 .p2align 4
2740L(bwd_write_131bytes):
2741 lddqu 115(%rsi), %xmm0
2742 movdqu %xmm0, 115(%rdi)
2743L(bwd_write_115bytes):
2744 lddqu 99(%rsi), %xmm0
2745 movdqu %xmm0, 99(%rdi)
2746L(bwd_write_99bytes):
2747 lddqu 83(%rsi), %xmm0
2748 movdqu %xmm0, 83(%rdi)
2749L(bwd_write_83bytes):
2750 lddqu 67(%rsi), %xmm0
2751 movdqu %xmm0, 67(%rdi)
2752L(bwd_write_67bytes):
2753 lddqu 51(%rsi), %xmm0
2754 movdqu %xmm0, 51(%rdi)
2755L(bwd_write_51bytes):
2756 lddqu 35(%rsi), %xmm0
2757 movdqu %xmm0, 35(%rdi)
2758L(bwd_write_35bytes):
2759 lddqu 19(%rsi), %xmm0
2760 movdqu %xmm0, 19(%rdi)
2761L(bwd_write_19bytes):
2762 lddqu 3(%rsi), %xmm0
2763 lddqu (%rsi), %xmm1
2764 movdqu %xmm0, 3(%rdi)
2765 movdqu %xmm1, (%rdi)
2766 ret
2767
2768 .p2align 4
2769L(bwd_write_3bytes):
2770 mov 1(%rsi), %dx
2771 mov (%rsi), %cx
2772 mov %dx, 1(%rdi)
2773 mov %cx, (%rdi)
2774 ret
2775
2776 .p2align 4
2777L(bwd_write_130bytes):
2778 lddqu 114(%rsi), %xmm0
2779 movdqu %xmm0, 114(%rdi)
2780L(bwd_write_114bytes):
2781 lddqu 98(%rsi), %xmm0
2782 movdqu %xmm0, 98(%rdi)
2783L(bwd_write_98bytes):
2784 lddqu 82(%rsi), %xmm0
2785 movdqu %xmm0, 82(%rdi)
2786L(bwd_write_82bytes):
2787 lddqu 66(%rsi), %xmm0
2788 movdqu %xmm0, 66(%rdi)
2789L(bwd_write_66bytes):
2790 lddqu 50(%rsi), %xmm0
2791 movdqu %xmm0, 50(%rdi)
2792L(bwd_write_50bytes):
2793 lddqu 34(%rsi), %xmm0
2794 movdqu %xmm0, 34(%rdi)
2795L(bwd_write_34bytes):
2796 lddqu 18(%rsi), %xmm0
2797 movdqu %xmm0, 18(%rdi)
2798L(bwd_write_18bytes):
2799 lddqu 2(%rsi), %xmm0
2800 lddqu (%rsi), %xmm1
2801 movdqu %xmm0, 2(%rdi)
2802 movdqu %xmm1, (%rdi)
2803 ret
2804
2805 .p2align 4
2806L(bwd_write_2bytes):
2807 movzwl (%rsi), %edx
2808 mov %dx, (%rdi)
2809 ret
2810
2811 .p2align 4
2812L(bwd_write_129bytes):
2813 lddqu 113(%rsi), %xmm0
2814 movdqu %xmm0, 113(%rdi)
2815L(bwd_write_113bytes):
2816 lddqu 97(%rsi), %xmm0
2817 movdqu %xmm0, 97(%rdi)
2818L(bwd_write_97bytes):
2819 lddqu 81(%rsi), %xmm0
2820 movdqu %xmm0, 81(%rdi)
2821L(bwd_write_81bytes):
2822 lddqu 65(%rsi), %xmm0
2823 movdqu %xmm0, 65(%rdi)
2824L(bwd_write_65bytes):
2825 lddqu 49(%rsi), %xmm0
2826 movdqu %xmm0, 49(%rdi)
2827L(bwd_write_49bytes):
2828 lddqu 33(%rsi), %xmm0
2829 movdqu %xmm0, 33(%rdi)
2830L(bwd_write_33bytes):
2831 lddqu 17(%rsi), %xmm0
2832 movdqu %xmm0, 17(%rdi)
2833L(bwd_write_17bytes):
2834 lddqu 1(%rsi), %xmm0
2835 lddqu (%rsi), %xmm1
2836 movdqu %xmm0, 1(%rdi)
2837 movdqu %xmm1, (%rdi)
2838 ret
2839
2840 .p2align 4
2841L(bwd_write_1bytes):
2842 movzbl (%rsi), %edx
2843 mov %dl, (%rdi)
2844 ret
2845
2846END (MEMCPY)
2847
2848 .section .rodata.ssse3,"a",@progbits
2849 .p2align 3
2850L(table_144_bytes_bwd):
2851 .int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
2852 .int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
2853 .int JMPTBL (L(bwd_write_2bytes), L(table_144_bytes_bwd))
2854 .int JMPTBL (L(bwd_write_3bytes), L(table_144_bytes_bwd))
2855 .int JMPTBL (L(bwd_write_4bytes), L(table_144_bytes_bwd))
2856 .int JMPTBL (L(bwd_write_5bytes), L(table_144_bytes_bwd))
2857 .int JMPTBL (L(bwd_write_6bytes), L(table_144_bytes_bwd))
2858 .int JMPTBL (L(bwd_write_7bytes), L(table_144_bytes_bwd))
2859 .int JMPTBL (L(bwd_write_8bytes), L(table_144_bytes_bwd))
2860 .int JMPTBL (L(bwd_write_9bytes), L(table_144_bytes_bwd))
2861 .int JMPTBL (L(bwd_write_10bytes), L(table_144_bytes_bwd))
2862 .int JMPTBL (L(bwd_write_11bytes), L(table_144_bytes_bwd))
2863 .int JMPTBL (L(bwd_write_12bytes), L(table_144_bytes_bwd))
2864 .int JMPTBL (L(bwd_write_13bytes), L(table_144_bytes_bwd))
2865 .int JMPTBL (L(bwd_write_14bytes), L(table_144_bytes_bwd))
2866 .int JMPTBL (L(bwd_write_15bytes), L(table_144_bytes_bwd))
2867 .int JMPTBL (L(bwd_write_16bytes), L(table_144_bytes_bwd))
2868 .int JMPTBL (L(bwd_write_17bytes), L(table_144_bytes_bwd))
2869 .int JMPTBL (L(bwd_write_18bytes), L(table_144_bytes_bwd))
2870 .int JMPTBL (L(bwd_write_19bytes), L(table_144_bytes_bwd))
2871 .int JMPTBL (L(bwd_write_20bytes), L(table_144_bytes_bwd))
2872 .int JMPTBL (L(bwd_write_21bytes), L(table_144_bytes_bwd))
2873 .int JMPTBL (L(bwd_write_22bytes), L(table_144_bytes_bwd))
2874 .int JMPTBL (L(bwd_write_23bytes), L(table_144_bytes_bwd))
2875 .int JMPTBL (L(bwd_write_24bytes), L(table_144_bytes_bwd))
2876 .int JMPTBL (L(bwd_write_25bytes), L(table_144_bytes_bwd))
2877 .int JMPTBL (L(bwd_write_26bytes), L(table_144_bytes_bwd))
2878 .int JMPTBL (L(bwd_write_27bytes), L(table_144_bytes_bwd))
2879 .int JMPTBL (L(bwd_write_28bytes), L(table_144_bytes_bwd))
2880 .int JMPTBL (L(bwd_write_29bytes), L(table_144_bytes_bwd))
2881 .int JMPTBL (L(bwd_write_30bytes), L(table_144_bytes_bwd))
2882 .int JMPTBL (L(bwd_write_31bytes), L(table_144_bytes_bwd))
2883 .int JMPTBL (L(bwd_write_32bytes), L(table_144_bytes_bwd))
2884 .int JMPTBL (L(bwd_write_33bytes), L(table_144_bytes_bwd))
2885 .int JMPTBL (L(bwd_write_34bytes), L(table_144_bytes_bwd))
2886 .int JMPTBL (L(bwd_write_35bytes), L(table_144_bytes_bwd))
2887 .int JMPTBL (L(bwd_write_36bytes), L(table_144_bytes_bwd))
2888 .int JMPTBL (L(bwd_write_37bytes), L(table_144_bytes_bwd))
2889 .int JMPTBL (L(bwd_write_38bytes), L(table_144_bytes_bwd))
2890 .int JMPTBL (L(bwd_write_39bytes), L(table_144_bytes_bwd))
2891 .int JMPTBL (L(bwd_write_40bytes), L(table_144_bytes_bwd))
2892 .int JMPTBL (L(bwd_write_41bytes), L(table_144_bytes_bwd))
2893 .int JMPTBL (L(bwd_write_42bytes), L(table_144_bytes_bwd))
2894 .int JMPTBL (L(bwd_write_43bytes), L(table_144_bytes_bwd))
2895 .int JMPTBL (L(bwd_write_44bytes), L(table_144_bytes_bwd))
2896 .int JMPTBL (L(bwd_write_45bytes), L(table_144_bytes_bwd))
2897 .int JMPTBL (L(bwd_write_46bytes), L(table_144_bytes_bwd))
2898 .int JMPTBL (L(bwd_write_47bytes), L(table_144_bytes_bwd))
2899 .int JMPTBL (L(bwd_write_48bytes), L(table_144_bytes_bwd))
2900 .int JMPTBL (L(bwd_write_49bytes), L(table_144_bytes_bwd))
2901 .int JMPTBL (L(bwd_write_50bytes), L(table_144_bytes_bwd))
2902 .int JMPTBL (L(bwd_write_51bytes), L(table_144_bytes_bwd))
2903 .int JMPTBL (L(bwd_write_52bytes), L(table_144_bytes_bwd))
2904 .int JMPTBL (L(bwd_write_53bytes), L(table_144_bytes_bwd))
2905 .int JMPTBL (L(bwd_write_54bytes), L(table_144_bytes_bwd))
2906 .int JMPTBL (L(bwd_write_55bytes), L(table_144_bytes_bwd))
2907 .int JMPTBL (L(bwd_write_56bytes), L(table_144_bytes_bwd))
2908 .int JMPTBL (L(bwd_write_57bytes), L(table_144_bytes_bwd))
2909 .int JMPTBL (L(bwd_write_58bytes), L(table_144_bytes_bwd))
2910 .int JMPTBL (L(bwd_write_59bytes), L(table_144_bytes_bwd))
2911 .int JMPTBL (L(bwd_write_60bytes), L(table_144_bytes_bwd))
2912 .int JMPTBL (L(bwd_write_61bytes), L(table_144_bytes_bwd))
2913 .int JMPTBL (L(bwd_write_62bytes), L(table_144_bytes_bwd))
2914 .int JMPTBL (L(bwd_write_63bytes), L(table_144_bytes_bwd))
2915 .int JMPTBL (L(bwd_write_64bytes), L(table_144_bytes_bwd))
2916 .int JMPTBL (L(bwd_write_65bytes), L(table_144_bytes_bwd))
2917 .int JMPTBL (L(bwd_write_66bytes), L(table_144_bytes_bwd))
2918 .int JMPTBL (L(bwd_write_67bytes), L(table_144_bytes_bwd))
2919 .int JMPTBL (L(bwd_write_68bytes), L(table_144_bytes_bwd))
2920 .int JMPTBL (L(bwd_write_69bytes), L(table_144_bytes_bwd))
2921 .int JMPTBL (L(bwd_write_70bytes), L(table_144_bytes_bwd))
2922 .int JMPTBL (L(bwd_write_71bytes), L(table_144_bytes_bwd))
2923 .int JMPTBL (L(bwd_write_72bytes), L(table_144_bytes_bwd))
2924 .int JMPTBL (L(bwd_write_73bytes), L(table_144_bytes_bwd))
2925 .int JMPTBL (L(bwd_write_74bytes), L(table_144_bytes_bwd))
2926 .int JMPTBL (L(bwd_write_75bytes), L(table_144_bytes_bwd))
2927 .int JMPTBL (L(bwd_write_76bytes), L(table_144_bytes_bwd))
2928 .int JMPTBL (L(bwd_write_77bytes), L(table_144_bytes_bwd))
2929 .int JMPTBL (L(bwd_write_78bytes), L(table_144_bytes_bwd))
2930 .int JMPTBL (L(bwd_write_79bytes), L(table_144_bytes_bwd))
2931 .int JMPTBL (L(bwd_write_80bytes), L(table_144_bytes_bwd))
2932 .int JMPTBL (L(bwd_write_81bytes), L(table_144_bytes_bwd))
2933 .int JMPTBL (L(bwd_write_82bytes), L(table_144_bytes_bwd))
2934 .int JMPTBL (L(bwd_write_83bytes), L(table_144_bytes_bwd))
2935 .int JMPTBL (L(bwd_write_84bytes), L(table_144_bytes_bwd))
2936 .int JMPTBL (L(bwd_write_85bytes), L(table_144_bytes_bwd))
2937 .int JMPTBL (L(bwd_write_86bytes), L(table_144_bytes_bwd))
2938 .int JMPTBL (L(bwd_write_87bytes), L(table_144_bytes_bwd))
2939 .int JMPTBL (L(bwd_write_88bytes), L(table_144_bytes_bwd))
2940 .int JMPTBL (L(bwd_write_89bytes), L(table_144_bytes_bwd))
2941 .int JMPTBL (L(bwd_write_90bytes), L(table_144_bytes_bwd))
2942 .int JMPTBL (L(bwd_write_91bytes), L(table_144_bytes_bwd))
2943 .int JMPTBL (L(bwd_write_92bytes), L(table_144_bytes_bwd))
2944 .int JMPTBL (L(bwd_write_93bytes), L(table_144_bytes_bwd))
2945 .int JMPTBL (L(bwd_write_94bytes), L(table_144_bytes_bwd))
2946 .int JMPTBL (L(bwd_write_95bytes), L(table_144_bytes_bwd))
2947 .int JMPTBL (L(bwd_write_96bytes), L(table_144_bytes_bwd))
2948 .int JMPTBL (L(bwd_write_97bytes), L(table_144_bytes_bwd))
2949 .int JMPTBL (L(bwd_write_98bytes), L(table_144_bytes_bwd))
2950 .int JMPTBL (L(bwd_write_99bytes), L(table_144_bytes_bwd))
2951 .int JMPTBL (L(bwd_write_100bytes), L(table_144_bytes_bwd))
2952 .int JMPTBL (L(bwd_write_101bytes), L(table_144_bytes_bwd))
2953 .int JMPTBL (L(bwd_write_102bytes), L(table_144_bytes_bwd))
2954 .int JMPTBL (L(bwd_write_103bytes), L(table_144_bytes_bwd))
2955 .int JMPTBL (L(bwd_write_104bytes), L(table_144_bytes_bwd))
2956 .int JMPTBL (L(bwd_write_105bytes), L(table_144_bytes_bwd))
2957 .int JMPTBL (L(bwd_write_106bytes), L(table_144_bytes_bwd))
2958 .int JMPTBL (L(bwd_write_107bytes), L(table_144_bytes_bwd))
2959 .int JMPTBL (L(bwd_write_108bytes), L(table_144_bytes_bwd))
2960 .int JMPTBL (L(bwd_write_109bytes), L(table_144_bytes_bwd))
2961 .int JMPTBL (L(bwd_write_110bytes), L(table_144_bytes_bwd))
2962 .int JMPTBL (L(bwd_write_111bytes), L(table_144_bytes_bwd))
2963 .int JMPTBL (L(bwd_write_112bytes), L(table_144_bytes_bwd))
2964 .int JMPTBL (L(bwd_write_113bytes), L(table_144_bytes_bwd))
2965 .int JMPTBL (L(bwd_write_114bytes), L(table_144_bytes_bwd))
2966 .int JMPTBL (L(bwd_write_115bytes), L(table_144_bytes_bwd))
2967 .int JMPTBL (L(bwd_write_116bytes), L(table_144_bytes_bwd))
2968 .int JMPTBL (L(bwd_write_117bytes), L(table_144_bytes_bwd))
2969 .int JMPTBL (L(bwd_write_118bytes), L(table_144_bytes_bwd))
2970 .int JMPTBL (L(bwd_write_119bytes), L(table_144_bytes_bwd))
2971 .int JMPTBL (L(bwd_write_120bytes), L(table_144_bytes_bwd))
2972 .int JMPTBL (L(bwd_write_121bytes), L(table_144_bytes_bwd))
2973 .int JMPTBL (L(bwd_write_122bytes), L(table_144_bytes_bwd))
2974 .int JMPTBL (L(bwd_write_123bytes), L(table_144_bytes_bwd))
2975 .int JMPTBL (L(bwd_write_124bytes), L(table_144_bytes_bwd))
2976 .int JMPTBL (L(bwd_write_125bytes), L(table_144_bytes_bwd))
2977 .int JMPTBL (L(bwd_write_126bytes), L(table_144_bytes_bwd))
2978 .int JMPTBL (L(bwd_write_127bytes), L(table_144_bytes_bwd))
2979 .int JMPTBL (L(bwd_write_128bytes), L(table_144_bytes_bwd))
2980 .int JMPTBL (L(bwd_write_129bytes), L(table_144_bytes_bwd))
2981 .int JMPTBL (L(bwd_write_130bytes), L(table_144_bytes_bwd))
2982 .int JMPTBL (L(bwd_write_131bytes), L(table_144_bytes_bwd))
2983 .int JMPTBL (L(bwd_write_132bytes), L(table_144_bytes_bwd))
2984 .int JMPTBL (L(bwd_write_133bytes), L(table_144_bytes_bwd))
2985 .int JMPTBL (L(bwd_write_134bytes), L(table_144_bytes_bwd))
2986 .int JMPTBL (L(bwd_write_135bytes), L(table_144_bytes_bwd))
2987 .int JMPTBL (L(bwd_write_136bytes), L(table_144_bytes_bwd))
2988 .int JMPTBL (L(bwd_write_137bytes), L(table_144_bytes_bwd))
2989 .int JMPTBL (L(bwd_write_138bytes), L(table_144_bytes_bwd))
2990 .int JMPTBL (L(bwd_write_139bytes), L(table_144_bytes_bwd))
2991 .int JMPTBL (L(bwd_write_140bytes), L(table_144_bytes_bwd))
2992 .int JMPTBL (L(bwd_write_141bytes), L(table_144_bytes_bwd))
2993 .int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
2994 .int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
2995
2996 .p2align 3
2997L(table_144_bytes_fwd):
2998 .int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
2999 .int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
3000 .int JMPTBL (L(fwd_write_2bytes), L(table_144_bytes_fwd))
3001 .int JMPTBL (L(fwd_write_3bytes), L(table_144_bytes_fwd))
3002 .int JMPTBL (L(fwd_write_4bytes), L(table_144_bytes_fwd))
3003 .int JMPTBL (L(fwd_write_5bytes), L(table_144_bytes_fwd))
3004 .int JMPTBL (L(fwd_write_6bytes), L(table_144_bytes_fwd))
3005 .int JMPTBL (L(fwd_write_7bytes), L(table_144_bytes_fwd))
3006 .int JMPTBL (L(fwd_write_8bytes), L(table_144_bytes_fwd))
3007 .int JMPTBL (L(fwd_write_9bytes), L(table_144_bytes_fwd))
3008 .int JMPTBL (L(fwd_write_10bytes), L(table_144_bytes_fwd))
3009 .int JMPTBL (L(fwd_write_11bytes), L(table_144_bytes_fwd))
3010 .int JMPTBL (L(fwd_write_12bytes), L(table_144_bytes_fwd))
3011 .int JMPTBL (L(fwd_write_13bytes), L(table_144_bytes_fwd))
3012 .int JMPTBL (L(fwd_write_14bytes), L(table_144_bytes_fwd))
3013 .int JMPTBL (L(fwd_write_15bytes), L(table_144_bytes_fwd))
3014 .int JMPTBL (L(fwd_write_16bytes), L(table_144_bytes_fwd))
3015 .int JMPTBL (L(fwd_write_17bytes), L(table_144_bytes_fwd))
3016 .int JMPTBL (L(fwd_write_18bytes), L(table_144_bytes_fwd))
3017 .int JMPTBL (L(fwd_write_19bytes), L(table_144_bytes_fwd))
3018 .int JMPTBL (L(fwd_write_20bytes), L(table_144_bytes_fwd))
3019 .int JMPTBL (L(fwd_write_21bytes), L(table_144_bytes_fwd))
3020 .int JMPTBL (L(fwd_write_22bytes), L(table_144_bytes_fwd))
3021 .int JMPTBL (L(fwd_write_23bytes), L(table_144_bytes_fwd))
3022 .int JMPTBL (L(fwd_write_24bytes), L(table_144_bytes_fwd))
3023 .int JMPTBL (L(fwd_write_25bytes), L(table_144_bytes_fwd))
3024 .int JMPTBL (L(fwd_write_26bytes), L(table_144_bytes_fwd))
3025 .int JMPTBL (L(fwd_write_27bytes), L(table_144_bytes_fwd))
3026 .int JMPTBL (L(fwd_write_28bytes), L(table_144_bytes_fwd))
3027 .int JMPTBL (L(fwd_write_29bytes), L(table_144_bytes_fwd))
3028 .int JMPTBL (L(fwd_write_30bytes), L(table_144_bytes_fwd))
3029 .int JMPTBL (L(fwd_write_31bytes), L(table_144_bytes_fwd))
3030 .int JMPTBL (L(fwd_write_32bytes), L(table_144_bytes_fwd))
3031 .int JMPTBL (L(fwd_write_33bytes), L(table_144_bytes_fwd))
3032 .int JMPTBL (L(fwd_write_34bytes), L(table_144_bytes_fwd))
3033 .int JMPTBL (L(fwd_write_35bytes), L(table_144_bytes_fwd))
3034 .int JMPTBL (L(fwd_write_36bytes), L(table_144_bytes_fwd))
3035 .int JMPTBL (L(fwd_write_37bytes), L(table_144_bytes_fwd))
3036 .int JMPTBL (L(fwd_write_38bytes), L(table_144_bytes_fwd))
3037 .int JMPTBL (L(fwd_write_39bytes), L(table_144_bytes_fwd))
3038 .int JMPTBL (L(fwd_write_40bytes), L(table_144_bytes_fwd))
3039 .int JMPTBL (L(fwd_write_41bytes), L(table_144_bytes_fwd))
3040 .int JMPTBL (L(fwd_write_42bytes), L(table_144_bytes_fwd))
3041 .int JMPTBL (L(fwd_write_43bytes), L(table_144_bytes_fwd))
3042 .int JMPTBL (L(fwd_write_44bytes), L(table_144_bytes_fwd))
3043 .int JMPTBL (L(fwd_write_45bytes), L(table_144_bytes_fwd))
3044 .int JMPTBL (L(fwd_write_46bytes), L(table_144_bytes_fwd))
3045 .int JMPTBL (L(fwd_write_47bytes), L(table_144_bytes_fwd))
3046 .int JMPTBL (L(fwd_write_48bytes), L(table_144_bytes_fwd))
3047 .int JMPTBL (L(fwd_write_49bytes), L(table_144_bytes_fwd))
3048 .int JMPTBL (L(fwd_write_50bytes), L(table_144_bytes_fwd))
3049 .int JMPTBL (L(fwd_write_51bytes), L(table_144_bytes_fwd))
3050 .int JMPTBL (L(fwd_write_52bytes), L(table_144_bytes_fwd))
3051 .int JMPTBL (L(fwd_write_53bytes), L(table_144_bytes_fwd))
3052 .int JMPTBL (L(fwd_write_54bytes), L(table_144_bytes_fwd))
3053 .int JMPTBL (L(fwd_write_55bytes), L(table_144_bytes_fwd))
3054 .int JMPTBL (L(fwd_write_56bytes), L(table_144_bytes_fwd))
3055 .int JMPTBL (L(fwd_write_57bytes), L(table_144_bytes_fwd))
3056 .int JMPTBL (L(fwd_write_58bytes), L(table_144_bytes_fwd))
3057 .int JMPTBL (L(fwd_write_59bytes), L(table_144_bytes_fwd))
3058 .int JMPTBL (L(fwd_write_60bytes), L(table_144_bytes_fwd))
3059 .int JMPTBL (L(fwd_write_61bytes), L(table_144_bytes_fwd))
3060 .int JMPTBL (L(fwd_write_62bytes), L(table_144_bytes_fwd))
3061 .int JMPTBL (L(fwd_write_63bytes), L(table_144_bytes_fwd))
3062 .int JMPTBL (L(fwd_write_64bytes), L(table_144_bytes_fwd))
3063 .int JMPTBL (L(fwd_write_65bytes), L(table_144_bytes_fwd))
3064 .int JMPTBL (L(fwd_write_66bytes), L(table_144_bytes_fwd))
3065 .int JMPTBL (L(fwd_write_67bytes), L(table_144_bytes_fwd))
3066 .int JMPTBL (L(fwd_write_68bytes), L(table_144_bytes_fwd))
3067 .int JMPTBL (L(fwd_write_69bytes), L(table_144_bytes_fwd))
3068 .int JMPTBL (L(fwd_write_70bytes), L(table_144_bytes_fwd))
3069 .int JMPTBL (L(fwd_write_71bytes), L(table_144_bytes_fwd))
3070 .int JMPTBL (L(fwd_write_72bytes), L(table_144_bytes_fwd))
3071 .int JMPTBL (L(fwd_write_73bytes), L(table_144_bytes_fwd))
3072 .int JMPTBL (L(fwd_write_74bytes), L(table_144_bytes_fwd))
3073 .int JMPTBL (L(fwd_write_75bytes), L(table_144_bytes_fwd))
3074 .int JMPTBL (L(fwd_write_76bytes), L(table_144_bytes_fwd))
3075 .int JMPTBL (L(fwd_write_77bytes), L(table_144_bytes_fwd))
3076 .int JMPTBL (L(fwd_write_78bytes), L(table_144_bytes_fwd))
3077 .int JMPTBL (L(fwd_write_79bytes), L(table_144_bytes_fwd))
3078 .int JMPTBL (L(fwd_write_80bytes), L(table_144_bytes_fwd))
3079 .int JMPTBL (L(fwd_write_81bytes), L(table_144_bytes_fwd))
3080 .int JMPTBL (L(fwd_write_82bytes), L(table_144_bytes_fwd))
3081 .int JMPTBL (L(fwd_write_83bytes), L(table_144_bytes_fwd))
3082 .int JMPTBL (L(fwd_write_84bytes), L(table_144_bytes_fwd))
3083 .int JMPTBL (L(fwd_write_85bytes), L(table_144_bytes_fwd))
3084 .int JMPTBL (L(fwd_write_86bytes), L(table_144_bytes_fwd))
3085 .int JMPTBL (L(fwd_write_87bytes), L(table_144_bytes_fwd))
3086 .int JMPTBL (L(fwd_write_88bytes), L(table_144_bytes_fwd))
3087 .int JMPTBL (L(fwd_write_89bytes), L(table_144_bytes_fwd))
3088 .int JMPTBL (L(fwd_write_90bytes), L(table_144_bytes_fwd))
3089 .int JMPTBL (L(fwd_write_91bytes), L(table_144_bytes_fwd))
3090 .int JMPTBL (L(fwd_write_92bytes), L(table_144_bytes_fwd))
3091 .int JMPTBL (L(fwd_write_93bytes), L(table_144_bytes_fwd))
3092 .int JMPTBL (L(fwd_write_94bytes), L(table_144_bytes_fwd))
3093 .int JMPTBL (L(fwd_write_95bytes), L(table_144_bytes_fwd))
3094 .int JMPTBL (L(fwd_write_96bytes), L(table_144_bytes_fwd))
3095 .int JMPTBL (L(fwd_write_97bytes), L(table_144_bytes_fwd))
3096 .int JMPTBL (L(fwd_write_98bytes), L(table_144_bytes_fwd))
3097 .int JMPTBL (L(fwd_write_99bytes), L(table_144_bytes_fwd))
3098 .int JMPTBL (L(fwd_write_100bytes), L(table_144_bytes_fwd))
3099 .int JMPTBL (L(fwd_write_101bytes), L(table_144_bytes_fwd))
3100 .int JMPTBL (L(fwd_write_102bytes), L(table_144_bytes_fwd))
3101 .int JMPTBL (L(fwd_write_103bytes), L(table_144_bytes_fwd))
3102 .int JMPTBL (L(fwd_write_104bytes), L(table_144_bytes_fwd))
3103 .int JMPTBL (L(fwd_write_105bytes), L(table_144_bytes_fwd))
3104 .int JMPTBL (L(fwd_write_106bytes), L(table_144_bytes_fwd))
3105 .int JMPTBL (L(fwd_write_107bytes), L(table_144_bytes_fwd))
3106 .int JMPTBL (L(fwd_write_108bytes), L(table_144_bytes_fwd))
3107 .int JMPTBL (L(fwd_write_109bytes), L(table_144_bytes_fwd))
3108 .int JMPTBL (L(fwd_write_110bytes), L(table_144_bytes_fwd))
3109 .int JMPTBL (L(fwd_write_111bytes), L(table_144_bytes_fwd))
3110 .int JMPTBL (L(fwd_write_112bytes), L(table_144_bytes_fwd))
3111 .int JMPTBL (L(fwd_write_113bytes), L(table_144_bytes_fwd))
3112 .int JMPTBL (L(fwd_write_114bytes), L(table_144_bytes_fwd))
3113 .int JMPTBL (L(fwd_write_115bytes), L(table_144_bytes_fwd))
3114 .int JMPTBL (L(fwd_write_116bytes), L(table_144_bytes_fwd))
3115 .int JMPTBL (L(fwd_write_117bytes), L(table_144_bytes_fwd))
3116 .int JMPTBL (L(fwd_write_118bytes), L(table_144_bytes_fwd))
3117 .int JMPTBL (L(fwd_write_119bytes), L(table_144_bytes_fwd))
3118 .int JMPTBL (L(fwd_write_120bytes), L(table_144_bytes_fwd))
3119 .int JMPTBL (L(fwd_write_121bytes), L(table_144_bytes_fwd))
3120 .int JMPTBL (L(fwd_write_122bytes), L(table_144_bytes_fwd))
3121 .int JMPTBL (L(fwd_write_123bytes), L(table_144_bytes_fwd))
3122 .int JMPTBL (L(fwd_write_124bytes), L(table_144_bytes_fwd))
3123 .int JMPTBL (L(fwd_write_125bytes), L(table_144_bytes_fwd))
3124 .int JMPTBL (L(fwd_write_126bytes), L(table_144_bytes_fwd))
3125 .int JMPTBL (L(fwd_write_127bytes), L(table_144_bytes_fwd))
3126 .int JMPTBL (L(fwd_write_128bytes), L(table_144_bytes_fwd))
3127 .int JMPTBL (L(fwd_write_129bytes), L(table_144_bytes_fwd))
3128 .int JMPTBL (L(fwd_write_130bytes), L(table_144_bytes_fwd))
3129 .int JMPTBL (L(fwd_write_131bytes), L(table_144_bytes_fwd))
3130 .int JMPTBL (L(fwd_write_132bytes), L(table_144_bytes_fwd))
3131 .int JMPTBL (L(fwd_write_133bytes), L(table_144_bytes_fwd))
3132 .int JMPTBL (L(fwd_write_134bytes), L(table_144_bytes_fwd))
3133 .int JMPTBL (L(fwd_write_135bytes), L(table_144_bytes_fwd))
3134 .int JMPTBL (L(fwd_write_136bytes), L(table_144_bytes_fwd))
3135 .int JMPTBL (L(fwd_write_137bytes), L(table_144_bytes_fwd))
3136 .int JMPTBL (L(fwd_write_138bytes), L(table_144_bytes_fwd))
3137 .int JMPTBL (L(fwd_write_139bytes), L(table_144_bytes_fwd))
3138 .int JMPTBL (L(fwd_write_140bytes), L(table_144_bytes_fwd))
3139 .int JMPTBL (L(fwd_write_141bytes), L(table_144_bytes_fwd))
3140 .int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
3141 .int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
3142
3143 .p2align 3
3144L(shl_table_fwd):
3145 .int JMPTBL (L(shl_0), L(shl_table_fwd))
3146 .int JMPTBL (L(shl_1), L(shl_table_fwd))
3147 .int JMPTBL (L(shl_2), L(shl_table_fwd))
3148 .int JMPTBL (L(shl_3), L(shl_table_fwd))
3149 .int JMPTBL (L(shl_4), L(shl_table_fwd))
3150 .int JMPTBL (L(shl_5), L(shl_table_fwd))
3151 .int JMPTBL (L(shl_6), L(shl_table_fwd))
3152 .int JMPTBL (L(shl_7), L(shl_table_fwd))
3153 .int JMPTBL (L(shl_8), L(shl_table_fwd))
3154 .int JMPTBL (L(shl_9), L(shl_table_fwd))
3155 .int JMPTBL (L(shl_10), L(shl_table_fwd))
3156 .int JMPTBL (L(shl_11), L(shl_table_fwd))
3157 .int JMPTBL (L(shl_12), L(shl_table_fwd))
3158 .int JMPTBL (L(shl_13), L(shl_table_fwd))
3159 .int JMPTBL (L(shl_14), L(shl_table_fwd))
3160 .int JMPTBL (L(shl_15), L(shl_table_fwd))
3161
3162 .p2align 3
3163L(shl_table_bwd):
3164 .int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
3165 .int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
3166 .int JMPTBL (L(shl_2_bwd), L(shl_table_bwd))
3167 .int JMPTBL (L(shl_3_bwd), L(shl_table_bwd))
3168 .int JMPTBL (L(shl_4_bwd), L(shl_table_bwd))
3169 .int JMPTBL (L(shl_5_bwd), L(shl_table_bwd))
3170 .int JMPTBL (L(shl_6_bwd), L(shl_table_bwd))
3171 .int JMPTBL (L(shl_7_bwd), L(shl_table_bwd))
3172 .int JMPTBL (L(shl_8_bwd), L(shl_table_bwd))
3173 .int JMPTBL (L(shl_9_bwd), L(shl_table_bwd))
3174 .int JMPTBL (L(shl_10_bwd), L(shl_table_bwd))
3175 .int JMPTBL (L(shl_11_bwd), L(shl_table_bwd))
3176 .int JMPTBL (L(shl_12_bwd), L(shl_table_bwd))
3177 .int JMPTBL (L(shl_13_bwd), L(shl_table_bwd))
3178 .int JMPTBL (L(shl_14_bwd), L(shl_table_bwd))
3179 .int JMPTBL (L(shl_15_bwd), L(shl_table_bwd))
3180
3181#endif
3182

source code of glibc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S