1/* strcpy with SSSE3
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21# ifndef USE_AS_STRCAT
22# include <sysdep.h>
23
24# ifndef STRCPY
25# define STRCPY __strcpy_ssse3
26# endif
27
28 .section .text.ssse3,"ax",@progbits
29ENTRY (STRCPY)
30
31 mov %rsi, %rcx
32# ifdef USE_AS_STRNCPY
33 mov %RDX_LP, %R8_LP
34# endif
35 mov %rdi, %rdx
36# ifdef USE_AS_STRNCPY
37 test %R8_LP, %R8_LP
38 jz L(Exit0)
39 cmp $8, %R8_LP
40 jbe L(StrncpyExit8Bytes)
41# endif
42 cmpb $0, (%rcx)
43 jz L(Exit1)
44 cmpb $0, 1(%rcx)
45 jz L(Exit2)
46 cmpb $0, 2(%rcx)
47 jz L(Exit3)
48 cmpb $0, 3(%rcx)
49 jz L(Exit4)
50 cmpb $0, 4(%rcx)
51 jz L(Exit5)
52 cmpb $0, 5(%rcx)
53 jz L(Exit6)
54 cmpb $0, 6(%rcx)
55 jz L(Exit7)
56 cmpb $0, 7(%rcx)
57 jz L(Exit8)
58# ifdef USE_AS_STRNCPY
59 cmp $16, %r8
60 jb L(StrncpyExit15Bytes)
61# endif
62 cmpb $0, 8(%rcx)
63 jz L(Exit9)
64 cmpb $0, 9(%rcx)
65 jz L(Exit10)
66 cmpb $0, 10(%rcx)
67 jz L(Exit11)
68 cmpb $0, 11(%rcx)
69 jz L(Exit12)
70 cmpb $0, 12(%rcx)
71 jz L(Exit13)
72 cmpb $0, 13(%rcx)
73 jz L(Exit14)
74 cmpb $0, 14(%rcx)
75 jz L(Exit15)
76# ifdef USE_AS_STRNCPY
77 cmp $16, %r8
78 je L(Exit16)
79# endif
80 cmpb $0, 15(%rcx)
81 jz L(Exit16)
82# endif
83
84# ifdef USE_AS_STRNCPY
85 mov %rcx, %rsi
86 sub $16, %r8
87 and $0xf, %rsi
88
89/* add 16 bytes rcx_offset to r8 */
90
91 add %rsi, %r8
92# endif
93 lea 16(%rcx), %rsi
94 and $-16, %rsi
95 pxor %xmm0, %xmm0
96 mov (%rcx), %r9
97 mov %r9, (%rdx)
98 pcmpeqb (%rsi), %xmm0
99 mov 8(%rcx), %r9
100 mov %r9, 8(%rdx)
101
102/* convert byte mask in xmm0 to bit mask */
103
104 pmovmskb %xmm0, %rax
105 sub %rcx, %rsi
106
107# ifdef USE_AS_STRNCPY
108 sub $16, %r8
109 jbe L(CopyFrom1To16BytesCase2OrCase3)
110# endif
111 test %rax, %rax
112 jnz L(CopyFrom1To16Bytes)
113
114 mov %rdx, %rax
115 lea 16(%rdx), %rdx
116 and $-16, %rdx
117 sub %rdx, %rax
118
119# ifdef USE_AS_STRNCPY
120 add %rax, %rsi
121 lea -1(%rsi), %rsi
122 and $1<<31, %esi
123 test %rsi, %rsi
124 jnz L(ContinueCopy)
125 lea 16(%r8), %r8
126
127L(ContinueCopy):
128# endif
129 sub %rax, %rcx
130 mov %rcx, %rax
131 and $0xf, %rax
132 mov $0, %rsi
133
134/* case: rcx_offset == rdx_offset */
135
136 jz L(Align16Both)
137
138 cmp $8, %rax
139 jae L(ShlHigh8)
140 cmp $1, %rax
141 je L(Shl1)
142 cmp $2, %rax
143 je L(Shl2)
144 cmp $3, %rax
145 je L(Shl3)
146 cmp $4, %rax
147 je L(Shl4)
148 cmp $5, %rax
149 je L(Shl5)
150 cmp $6, %rax
151 je L(Shl6)
152 jmp L(Shl7)
153
154L(ShlHigh8):
155 je L(Shl8)
156 cmp $9, %rax
157 je L(Shl9)
158 cmp $10, %rax
159 je L(Shl10)
160 cmp $11, %rax
161 je L(Shl11)
162 cmp $12, %rax
163 je L(Shl12)
164 cmp $13, %rax
165 je L(Shl13)
166 cmp $14, %rax
167 je L(Shl14)
168 jmp L(Shl15)
169
170L(Align16Both):
171 movaps (%rcx), %xmm1
172 movaps 16(%rcx), %xmm2
173 movaps %xmm1, (%rdx)
174 pcmpeqb %xmm2, %xmm0
175 pmovmskb %xmm0, %rax
176 lea 16(%rsi), %rsi
177# ifdef USE_AS_STRNCPY
178 sub $16, %r8
179 jbe L(CopyFrom1To16BytesCase2OrCase3)
180# endif
181 test %rax, %rax
182 jnz L(CopyFrom1To16Bytes)
183
184 movaps 16(%rcx, %rsi), %xmm3
185 movaps %xmm2, (%rdx, %rsi)
186 pcmpeqb %xmm3, %xmm0
187 pmovmskb %xmm0, %rax
188 lea 16(%rsi), %rsi
189# ifdef USE_AS_STRNCPY
190 sub $16, %r8
191 jbe L(CopyFrom1To16BytesCase2OrCase3)
192# endif
193 test %rax, %rax
194 jnz L(CopyFrom1To16Bytes)
195
196 movaps 16(%rcx, %rsi), %xmm4
197 movaps %xmm3, (%rdx, %rsi)
198 pcmpeqb %xmm4, %xmm0
199 pmovmskb %xmm0, %rax
200 lea 16(%rsi), %rsi
201# ifdef USE_AS_STRNCPY
202 sub $16, %r8
203 jbe L(CopyFrom1To16BytesCase2OrCase3)
204# endif
205 test %rax, %rax
206 jnz L(CopyFrom1To16Bytes)
207
208 movaps 16(%rcx, %rsi), %xmm1
209 movaps %xmm4, (%rdx, %rsi)
210 pcmpeqb %xmm1, %xmm0
211 pmovmskb %xmm0, %rax
212 lea 16(%rsi), %rsi
213# ifdef USE_AS_STRNCPY
214 sub $16, %r8
215 jbe L(CopyFrom1To16BytesCase2OrCase3)
216# endif
217 test %rax, %rax
218 jnz L(CopyFrom1To16Bytes)
219
220 movaps 16(%rcx, %rsi), %xmm2
221 movaps %xmm1, (%rdx, %rsi)
222 pcmpeqb %xmm2, %xmm0
223 pmovmskb %xmm0, %rax
224 lea 16(%rsi), %rsi
225# ifdef USE_AS_STRNCPY
226 sub $16, %r8
227 jbe L(CopyFrom1To16BytesCase2OrCase3)
228# endif
229 test %rax, %rax
230 jnz L(CopyFrom1To16Bytes)
231
232 movaps 16(%rcx, %rsi), %xmm3
233 movaps %xmm2, (%rdx, %rsi)
234 pcmpeqb %xmm3, %xmm0
235 pmovmskb %xmm0, %rax
236 lea 16(%rsi), %rsi
237# ifdef USE_AS_STRNCPY
238 sub $16, %r8
239 jbe L(CopyFrom1To16BytesCase2OrCase3)
240# endif
241 test %rax, %rax
242 jnz L(CopyFrom1To16Bytes)
243
244 movaps %xmm3, (%rdx, %rsi)
245 mov %rcx, %rax
246 lea 16(%rcx, %rsi), %rcx
247 and $-0x40, %rcx
248 sub %rcx, %rax
249 sub %rax, %rdx
250# ifdef USE_AS_STRNCPY
251 lea 112(%r8, %rax), %r8
252# endif
253 mov $-0x40, %rsi
254
255 .p2align 4
256L(Aligned64Loop):
257 movaps (%rcx), %xmm2
258 movaps %xmm2, %xmm4
259 movaps 16(%rcx), %xmm5
260 movaps 32(%rcx), %xmm3
261 movaps %xmm3, %xmm6
262 movaps 48(%rcx), %xmm7
263 pminub %xmm5, %xmm2
264 pminub %xmm7, %xmm3
265 pminub %xmm2, %xmm3
266 pcmpeqb %xmm0, %xmm3
267 pmovmskb %xmm3, %rax
268 lea 64(%rdx), %rdx
269 lea 64(%rcx), %rcx
270# ifdef USE_AS_STRNCPY
271 sub $64, %r8
272 jbe L(StrncpyLeaveCase2OrCase3)
273# endif
274 test %rax, %rax
275 jnz L(Aligned64Leave)
276 movaps %xmm4, -64(%rdx)
277 movaps %xmm5, -48(%rdx)
278 movaps %xmm6, -32(%rdx)
279 movaps %xmm7, -16(%rdx)
280 jmp L(Aligned64Loop)
281
282L(Aligned64Leave):
283# ifdef USE_AS_STRNCPY
284 lea 48(%r8), %r8
285# endif
286 pcmpeqb %xmm4, %xmm0
287 pmovmskb %xmm0, %rax
288 test %rax, %rax
289 jnz L(CopyFrom1To16Bytes)
290
291 pcmpeqb %xmm5, %xmm0
292# ifdef USE_AS_STRNCPY
293 lea -16(%r8), %r8
294# endif
295 pmovmskb %xmm0, %rax
296 movaps %xmm4, -64(%rdx)
297 test %rax, %rax
298 lea 16(%rsi), %rsi
299 jnz L(CopyFrom1To16Bytes)
300
301 pcmpeqb %xmm6, %xmm0
302# ifdef USE_AS_STRNCPY
303 lea -16(%r8), %r8
304# endif
305 pmovmskb %xmm0, %rax
306 movaps %xmm5, -48(%rdx)
307 test %rax, %rax
308 lea 16(%rsi), %rsi
309 jnz L(CopyFrom1To16Bytes)
310
311 movaps %xmm6, -32(%rdx)
312 pcmpeqb %xmm7, %xmm0
313# ifdef USE_AS_STRNCPY
314 lea -16(%r8), %r8
315# endif
316 pmovmskb %xmm0, %rax
317 lea 16(%rsi), %rsi
318 jmp L(CopyFrom1To16Bytes)
319
320 .p2align 4
321L(Shl1):
322 movaps -1(%rcx), %xmm1
323 movaps 15(%rcx), %xmm2
324L(Shl1Start):
325 pcmpeqb %xmm2, %xmm0
326 pmovmskb %xmm0, %rax
327 movaps %xmm2, %xmm3
328# ifdef USE_AS_STRNCPY
329 sub $16, %r8
330 jbe L(StrncpyExit1Case2OrCase3)
331# endif
332 test %rax, %rax
333 jnz L(Shl1LoopExit)
334
335 palignr $1, %xmm1, %xmm2
336 movaps %xmm2, (%rdx)
337 movaps 31(%rcx), %xmm2
338
339 pcmpeqb %xmm2, %xmm0
340 lea 16(%rdx), %rdx
341 pmovmskb %xmm0, %rax
342 lea 16(%rcx), %rcx
343 movaps %xmm2, %xmm1
344# ifdef USE_AS_STRNCPY
345 sub $16, %r8
346 jbe L(StrncpyExit1Case2OrCase3)
347# endif
348 test %rax, %rax
349 jnz L(Shl1LoopExit)
350
351 palignr $1, %xmm3, %xmm2
352 movaps %xmm2, (%rdx)
353 movaps 31(%rcx), %xmm2
354
355 pcmpeqb %xmm2, %xmm0
356 lea 16(%rdx), %rdx
357 pmovmskb %xmm0, %rax
358 lea 16(%rcx), %rcx
359 movaps %xmm2, %xmm3
360# ifdef USE_AS_STRNCPY
361 sub $16, %r8
362 jbe L(StrncpyExit1Case2OrCase3)
363# endif
364 test %rax, %rax
365 jnz L(Shl1LoopExit)
366
367 palignr $1, %xmm1, %xmm2
368 movaps %xmm2, (%rdx)
369 movaps 31(%rcx), %xmm2
370
371 pcmpeqb %xmm2, %xmm0
372 lea 16(%rdx), %rdx
373 pmovmskb %xmm0, %rax
374 lea 16(%rcx), %rcx
375# ifdef USE_AS_STRNCPY
376 sub $16, %r8
377 jbe L(StrncpyExit1Case2OrCase3)
378# endif
379 test %rax, %rax
380 jnz L(Shl1LoopExit)
381
382 palignr $1, %xmm3, %xmm2
383 movaps %xmm2, (%rdx)
384 lea 31(%rcx), %rcx
385 lea 16(%rdx), %rdx
386
387 mov %rcx, %rax
388 and $-0x40, %rcx
389 sub %rcx, %rax
390 lea -15(%rcx), %rcx
391 sub %rax, %rdx
392# ifdef USE_AS_STRNCPY
393 add %rax, %r8
394# endif
395 movaps -1(%rcx), %xmm1
396
397/* 64 bytes loop */
398 .p2align 4
399L(Shl1LoopStart):
400 movaps 15(%rcx), %xmm2
401 movaps 31(%rcx), %xmm3
402 movaps %xmm3, %xmm6
403 movaps 47(%rcx), %xmm4
404 movaps %xmm4, %xmm7
405 movaps 63(%rcx), %xmm5
406 pminub %xmm2, %xmm6
407 pminub %xmm5, %xmm7
408 pminub %xmm6, %xmm7
409 pcmpeqb %xmm0, %xmm7
410 pmovmskb %xmm7, %rax
411 movaps %xmm5, %xmm7
412 palignr $1, %xmm4, %xmm5
413 test %rax, %rax
414 palignr $1, %xmm3, %xmm4
415 jnz L(Shl1Start)
416# ifdef USE_AS_STRNCPY
417 sub $64, %r8
418 jbe L(StrncpyLeave1)
419# endif
420 palignr $1, %xmm2, %xmm3
421 lea 64(%rcx), %rcx
422 palignr $1, %xmm1, %xmm2
423 movaps %xmm7, %xmm1
424 movaps %xmm5, 48(%rdx)
425 movaps %xmm4, 32(%rdx)
426 movaps %xmm3, 16(%rdx)
427 movaps %xmm2, (%rdx)
428 lea 64(%rdx), %rdx
429 jmp L(Shl1LoopStart)
430
431L(Shl1LoopExit):
432 movdqu -1(%rcx), %xmm1
433 mov $15, %rsi
434 movdqu %xmm1, -1(%rdx)
435 jmp L(CopyFrom1To16Bytes)
436
437 .p2align 4
438L(Shl2):
439 movaps -2(%rcx), %xmm1
440 movaps 14(%rcx), %xmm2
441L(Shl2Start):
442 pcmpeqb %xmm2, %xmm0
443 pmovmskb %xmm0, %rax
444 movaps %xmm2, %xmm3
445# ifdef USE_AS_STRNCPY
446 sub $16, %r8
447 jbe L(StrncpyExit2Case2OrCase3)
448# endif
449 test %rax, %rax
450 jnz L(Shl2LoopExit)
451
452 palignr $2, %xmm1, %xmm2
453 movaps %xmm2, (%rdx)
454 movaps 30(%rcx), %xmm2
455
456 pcmpeqb %xmm2, %xmm0
457 lea 16(%rdx), %rdx
458 pmovmskb %xmm0, %rax
459 lea 16(%rcx), %rcx
460 movaps %xmm2, %xmm1
461# ifdef USE_AS_STRNCPY
462 sub $16, %r8
463 jbe L(StrncpyExit2Case2OrCase3)
464# endif
465 test %rax, %rax
466 jnz L(Shl2LoopExit)
467
468 palignr $2, %xmm3, %xmm2
469 movaps %xmm2, (%rdx)
470 movaps 30(%rcx), %xmm2
471
472 pcmpeqb %xmm2, %xmm0
473 lea 16(%rdx), %rdx
474 pmovmskb %xmm0, %rax
475 lea 16(%rcx), %rcx
476 movaps %xmm2, %xmm3
477# ifdef USE_AS_STRNCPY
478 sub $16, %r8
479 jbe L(StrncpyExit2Case2OrCase3)
480# endif
481 test %rax, %rax
482 jnz L(Shl2LoopExit)
483
484 palignr $2, %xmm1, %xmm2
485 movaps %xmm2, (%rdx)
486 movaps 30(%rcx), %xmm2
487
488 pcmpeqb %xmm2, %xmm0
489 lea 16(%rdx), %rdx
490 pmovmskb %xmm0, %rax
491 lea 16(%rcx), %rcx
492# ifdef USE_AS_STRNCPY
493 sub $16, %r8
494 jbe L(StrncpyExit2Case2OrCase3)
495# endif
496 test %rax, %rax
497 jnz L(Shl2LoopExit)
498
499 palignr $2, %xmm3, %xmm2
500 movaps %xmm2, (%rdx)
501 lea 30(%rcx), %rcx
502 lea 16(%rdx), %rdx
503
504 mov %rcx, %rax
505 and $-0x40, %rcx
506 sub %rcx, %rax
507 lea -14(%rcx), %rcx
508 sub %rax, %rdx
509# ifdef USE_AS_STRNCPY
510 add %rax, %r8
511# endif
512 movaps -2(%rcx), %xmm1
513
514/* 64 bytes loop */
515 .p2align 4
516L(Shl2LoopStart):
517 movaps 14(%rcx), %xmm2
518 movaps 30(%rcx), %xmm3
519 movaps %xmm3, %xmm6
520 movaps 46(%rcx), %xmm4
521 movaps %xmm4, %xmm7
522 movaps 62(%rcx), %xmm5
523 pminub %xmm2, %xmm6
524 pminub %xmm5, %xmm7
525 pminub %xmm6, %xmm7
526 pcmpeqb %xmm0, %xmm7
527 pmovmskb %xmm7, %rax
528 movaps %xmm5, %xmm7
529 palignr $2, %xmm4, %xmm5
530 test %rax, %rax
531 palignr $2, %xmm3, %xmm4
532 jnz L(Shl2Start)
533# ifdef USE_AS_STRNCPY
534 sub $64, %r8
535 jbe L(StrncpyLeave2)
536# endif
537 palignr $2, %xmm2, %xmm3
538 lea 64(%rcx), %rcx
539 palignr $2, %xmm1, %xmm2
540 movaps %xmm7, %xmm1
541 movaps %xmm5, 48(%rdx)
542 movaps %xmm4, 32(%rdx)
543 movaps %xmm3, 16(%rdx)
544 movaps %xmm2, (%rdx)
545 lea 64(%rdx), %rdx
546 jmp L(Shl2LoopStart)
547
548L(Shl2LoopExit):
549 movdqu -2(%rcx), %xmm1
550 mov $14, %rsi
551 movdqu %xmm1, -2(%rdx)
552 jmp L(CopyFrom1To16Bytes)
553
554 .p2align 4
555L(Shl3):
556 movaps -3(%rcx), %xmm1
557 movaps 13(%rcx), %xmm2
558L(Shl3Start):
559 pcmpeqb %xmm2, %xmm0
560 pmovmskb %xmm0, %rax
561 movaps %xmm2, %xmm3
562# ifdef USE_AS_STRNCPY
563 sub $16, %r8
564 jbe L(StrncpyExit3Case2OrCase3)
565# endif
566 test %rax, %rax
567 jnz L(Shl3LoopExit)
568
569 palignr $3, %xmm1, %xmm2
570 movaps %xmm2, (%rdx)
571 movaps 29(%rcx), %xmm2
572
573 pcmpeqb %xmm2, %xmm0
574 lea 16(%rdx), %rdx
575 pmovmskb %xmm0, %rax
576 lea 16(%rcx), %rcx
577 movaps %xmm2, %xmm1
578# ifdef USE_AS_STRNCPY
579 sub $16, %r8
580 jbe L(StrncpyExit3Case2OrCase3)
581# endif
582 test %rax, %rax
583 jnz L(Shl3LoopExit)
584
585 palignr $3, %xmm3, %xmm2
586 movaps %xmm2, (%rdx)
587 movaps 29(%rcx), %xmm2
588
589 pcmpeqb %xmm2, %xmm0
590 lea 16(%rdx), %rdx
591 pmovmskb %xmm0, %rax
592 lea 16(%rcx), %rcx
593 movaps %xmm2, %xmm3
594# ifdef USE_AS_STRNCPY
595 sub $16, %r8
596 jbe L(StrncpyExit3Case2OrCase3)
597# endif
598 test %rax, %rax
599 jnz L(Shl3LoopExit)
600
601 palignr $3, %xmm1, %xmm2
602 movaps %xmm2, (%rdx)
603 movaps 29(%rcx), %xmm2
604
605 pcmpeqb %xmm2, %xmm0
606 lea 16(%rdx), %rdx
607 pmovmskb %xmm0, %rax
608 lea 16(%rcx), %rcx
609# ifdef USE_AS_STRNCPY
610 sub $16, %r8
611 jbe L(StrncpyExit3Case2OrCase3)
612# endif
613 test %rax, %rax
614 jnz L(Shl3LoopExit)
615
616 palignr $3, %xmm3, %xmm2
617 movaps %xmm2, (%rdx)
618 lea 29(%rcx), %rcx
619 lea 16(%rdx), %rdx
620
621 mov %rcx, %rax
622 and $-0x40, %rcx
623 sub %rcx, %rax
624 lea -13(%rcx), %rcx
625 sub %rax, %rdx
626# ifdef USE_AS_STRNCPY
627 add %rax, %r8
628# endif
629 movaps -3(%rcx), %xmm1
630
631/* 64 bytes loop */
632 .p2align 4
633L(Shl3LoopStart):
634 movaps 13(%rcx), %xmm2
635 movaps 29(%rcx), %xmm3
636 movaps %xmm3, %xmm6
637 movaps 45(%rcx), %xmm4
638 movaps %xmm4, %xmm7
639 movaps 61(%rcx), %xmm5
640 pminub %xmm2, %xmm6
641 pminub %xmm5, %xmm7
642 pminub %xmm6, %xmm7
643 pcmpeqb %xmm0, %xmm7
644 pmovmskb %xmm7, %rax
645 movaps %xmm5, %xmm7
646 palignr $3, %xmm4, %xmm5
647 test %rax, %rax
648 palignr $3, %xmm3, %xmm4
649 jnz L(Shl3Start)
650# ifdef USE_AS_STRNCPY
651 sub $64, %r8
652 jbe L(StrncpyLeave3)
653# endif
654 palignr $3, %xmm2, %xmm3
655 lea 64(%rcx), %rcx
656 palignr $3, %xmm1, %xmm2
657 movaps %xmm7, %xmm1
658 movaps %xmm5, 48(%rdx)
659 movaps %xmm4, 32(%rdx)
660 movaps %xmm3, 16(%rdx)
661 movaps %xmm2, (%rdx)
662 lea 64(%rdx), %rdx
663 jmp L(Shl3LoopStart)
664
665L(Shl3LoopExit):
666 movdqu -3(%rcx), %xmm1
667 mov $13, %rsi
668 movdqu %xmm1, -3(%rdx)
669 jmp L(CopyFrom1To16Bytes)
670
671 .p2align 4
672L(Shl4):
673 movaps -4(%rcx), %xmm1
674 movaps 12(%rcx), %xmm2
675L(Shl4Start):
676 pcmpeqb %xmm2, %xmm0
677 pmovmskb %xmm0, %rax
678 movaps %xmm2, %xmm3
679# ifdef USE_AS_STRNCPY
680 sub $16, %r8
681 jbe L(StrncpyExit4Case2OrCase3)
682# endif
683 test %rax, %rax
684 jnz L(Shl4LoopExit)
685
686 palignr $4, %xmm1, %xmm2
687 movaps %xmm2, (%rdx)
688 movaps 28(%rcx), %xmm2
689
690 pcmpeqb %xmm2, %xmm0
691 lea 16(%rdx), %rdx
692 pmovmskb %xmm0, %rax
693 lea 16(%rcx), %rcx
694 movaps %xmm2, %xmm1
695# ifdef USE_AS_STRNCPY
696 sub $16, %r8
697 jbe L(StrncpyExit4Case2OrCase3)
698# endif
699 test %rax, %rax
700 jnz L(Shl4LoopExit)
701
702 palignr $4, %xmm3, %xmm2
703 movaps %xmm2, (%rdx)
704 movaps 28(%rcx), %xmm2
705
706 pcmpeqb %xmm2, %xmm0
707 lea 16(%rdx), %rdx
708 pmovmskb %xmm0, %rax
709 lea 16(%rcx), %rcx
710 movaps %xmm2, %xmm3
711# ifdef USE_AS_STRNCPY
712 sub $16, %r8
713 jbe L(StrncpyExit4Case2OrCase3)
714# endif
715 test %rax, %rax
716 jnz L(Shl4LoopExit)
717
718 palignr $4, %xmm1, %xmm2
719 movaps %xmm2, (%rdx)
720 movaps 28(%rcx), %xmm2
721
722 pcmpeqb %xmm2, %xmm0
723 lea 16(%rdx), %rdx
724 pmovmskb %xmm0, %rax
725 lea 16(%rcx), %rcx
726# ifdef USE_AS_STRNCPY
727 sub $16, %r8
728 jbe L(StrncpyExit4Case2OrCase3)
729# endif
730 test %rax, %rax
731 jnz L(Shl4LoopExit)
732
733 palignr $4, %xmm3, %xmm2
734 movaps %xmm2, (%rdx)
735 lea 28(%rcx), %rcx
736 lea 16(%rdx), %rdx
737
738 mov %rcx, %rax
739 and $-0x40, %rcx
740 sub %rcx, %rax
741 lea -12(%rcx), %rcx
742 sub %rax, %rdx
743# ifdef USE_AS_STRNCPY
744 add %rax, %r8
745# endif
746 movaps -4(%rcx), %xmm1
747
748/* 64 bytes loop */
749 .p2align 4
750L(Shl4LoopStart):
751 movaps 12(%rcx), %xmm2
752 movaps 28(%rcx), %xmm3
753 movaps %xmm3, %xmm6
754 movaps 44(%rcx), %xmm4
755 movaps %xmm4, %xmm7
756 movaps 60(%rcx), %xmm5
757 pminub %xmm2, %xmm6
758 pminub %xmm5, %xmm7
759 pminub %xmm6, %xmm7
760 pcmpeqb %xmm0, %xmm7
761 pmovmskb %xmm7, %rax
762 movaps %xmm5, %xmm7
763 palignr $4, %xmm4, %xmm5
764 test %rax, %rax
765 palignr $4, %xmm3, %xmm4
766 jnz L(Shl4Start)
767# ifdef USE_AS_STRNCPY
768 sub $64, %r8
769 jbe L(StrncpyLeave4)
770# endif
771 palignr $4, %xmm2, %xmm3
772 lea 64(%rcx), %rcx
773 palignr $4, %xmm1, %xmm2
774 movaps %xmm7, %xmm1
775 movaps %xmm5, 48(%rdx)
776 movaps %xmm4, 32(%rdx)
777 movaps %xmm3, 16(%rdx)
778 movaps %xmm2, (%rdx)
779 lea 64(%rdx), %rdx
780 jmp L(Shl4LoopStart)
781
782L(Shl4LoopExit):
783 movdqu -4(%rcx), %xmm1
784 mov $12, %rsi
785 movdqu %xmm1, -4(%rdx)
786 jmp L(CopyFrom1To16Bytes)
787
788 .p2align 4
789L(Shl5):
790 movaps -5(%rcx), %xmm1
791 movaps 11(%rcx), %xmm2
792L(Shl5Start):
793 pcmpeqb %xmm2, %xmm0
794 pmovmskb %xmm0, %rax
795 movaps %xmm2, %xmm3
796# ifdef USE_AS_STRNCPY
797 sub $16, %r8
798 jbe L(StrncpyExit5Case2OrCase3)
799# endif
800 test %rax, %rax
801 jnz L(Shl5LoopExit)
802
803 palignr $5, %xmm1, %xmm2
804 movaps %xmm2, (%rdx)
805 movaps 27(%rcx), %xmm2
806
807 pcmpeqb %xmm2, %xmm0
808 lea 16(%rdx), %rdx
809 pmovmskb %xmm0, %rax
810 lea 16(%rcx), %rcx
811 movaps %xmm2, %xmm1
812# ifdef USE_AS_STRNCPY
813 sub $16, %r8
814 jbe L(StrncpyExit5Case2OrCase3)
815# endif
816 test %rax, %rax
817 jnz L(Shl5LoopExit)
818
819 palignr $5, %xmm3, %xmm2
820 movaps %xmm2, (%rdx)
821 movaps 27(%rcx), %xmm2
822
823 pcmpeqb %xmm2, %xmm0
824 lea 16(%rdx), %rdx
825 pmovmskb %xmm0, %rax
826 lea 16(%rcx), %rcx
827 movaps %xmm2, %xmm3
828# ifdef USE_AS_STRNCPY
829 sub $16, %r8
830 jbe L(StrncpyExit5Case2OrCase3)
831# endif
832 test %rax, %rax
833 jnz L(Shl5LoopExit)
834
835 palignr $5, %xmm1, %xmm2
836 movaps %xmm2, (%rdx)
837 movaps 27(%rcx), %xmm2
838
839 pcmpeqb %xmm2, %xmm0
840 lea 16(%rdx), %rdx
841 pmovmskb %xmm0, %rax
842 lea 16(%rcx), %rcx
843# ifdef USE_AS_STRNCPY
844 sub $16, %r8
845 jbe L(StrncpyExit5Case2OrCase3)
846# endif
847 test %rax, %rax
848 jnz L(Shl5LoopExit)
849
850 palignr $5, %xmm3, %xmm2
851 movaps %xmm2, (%rdx)
852 lea 27(%rcx), %rcx
853 lea 16(%rdx), %rdx
854
855 mov %rcx, %rax
856 and $-0x40, %rcx
857 sub %rcx, %rax
858 lea -11(%rcx), %rcx
859 sub %rax, %rdx
860# ifdef USE_AS_STRNCPY
861 add %rax, %r8
862# endif
863 movaps -5(%rcx), %xmm1
864
865/* 64 bytes loop */
866 .p2align 4
867L(Shl5LoopStart):
868 movaps 11(%rcx), %xmm2
869 movaps 27(%rcx), %xmm3
870 movaps %xmm3, %xmm6
871 movaps 43(%rcx), %xmm4
872 movaps %xmm4, %xmm7
873 movaps 59(%rcx), %xmm5
874 pminub %xmm2, %xmm6
875 pminub %xmm5, %xmm7
876 pminub %xmm6, %xmm7
877 pcmpeqb %xmm0, %xmm7
878 pmovmskb %xmm7, %rax
879 movaps %xmm5, %xmm7
880 palignr $5, %xmm4, %xmm5
881 test %rax, %rax
882 palignr $5, %xmm3, %xmm4
883 jnz L(Shl5Start)
884# ifdef USE_AS_STRNCPY
885 sub $64, %r8
886 jbe L(StrncpyLeave5)
887# endif
888 palignr $5, %xmm2, %xmm3
889 lea 64(%rcx), %rcx
890 palignr $5, %xmm1, %xmm2
891 movaps %xmm7, %xmm1
892 movaps %xmm5, 48(%rdx)
893 movaps %xmm4, 32(%rdx)
894 movaps %xmm3, 16(%rdx)
895 movaps %xmm2, (%rdx)
896 lea 64(%rdx), %rdx
897 jmp L(Shl5LoopStart)
898
899L(Shl5LoopExit):
900 movdqu -5(%rcx), %xmm1
901 mov $11, %rsi
902 movdqu %xmm1, -5(%rdx)
903 jmp L(CopyFrom1To16Bytes)
904
905 .p2align 4
906L(Shl6):
907 movaps -6(%rcx), %xmm1
908 movaps 10(%rcx), %xmm2
909L(Shl6Start):
910 pcmpeqb %xmm2, %xmm0
911 pmovmskb %xmm0, %rax
912 movaps %xmm2, %xmm3
913# ifdef USE_AS_STRNCPY
914 sub $16, %r8
915 jbe L(StrncpyExit6Case2OrCase3)
916# endif
917 test %rax, %rax
918 jnz L(Shl6LoopExit)
919
920 palignr $6, %xmm1, %xmm2
921 movaps %xmm2, (%rdx)
922 movaps 26(%rcx), %xmm2
923
924 pcmpeqb %xmm2, %xmm0
925 lea 16(%rdx), %rdx
926 pmovmskb %xmm0, %rax
927 lea 16(%rcx), %rcx
928 movaps %xmm2, %xmm1
929# ifdef USE_AS_STRNCPY
930 sub $16, %r8
931 jbe L(StrncpyExit6Case2OrCase3)
932# endif
933 test %rax, %rax
934 jnz L(Shl6LoopExit)
935
936 palignr $6, %xmm3, %xmm2
937 movaps %xmm2, (%rdx)
938 movaps 26(%rcx), %xmm2
939
940 pcmpeqb %xmm2, %xmm0
941 lea 16(%rdx), %rdx
942 pmovmskb %xmm0, %rax
943 lea 16(%rcx), %rcx
944 movaps %xmm2, %xmm3
945# ifdef USE_AS_STRNCPY
946 sub $16, %r8
947 jbe L(StrncpyExit6Case2OrCase3)
948# endif
949 test %rax, %rax
950 jnz L(Shl6LoopExit)
951
952 palignr $6, %xmm1, %xmm2
953 movaps %xmm2, (%rdx)
954 movaps 26(%rcx), %xmm2
955
956 pcmpeqb %xmm2, %xmm0
957 lea 16(%rdx), %rdx
958 pmovmskb %xmm0, %rax
959 lea 16(%rcx), %rcx
960# ifdef USE_AS_STRNCPY
961 sub $16, %r8
962 jbe L(StrncpyExit6Case2OrCase3)
963# endif
964 test %rax, %rax
965 jnz L(Shl6LoopExit)
966
967 palignr $6, %xmm3, %xmm2
968 movaps %xmm2, (%rdx)
969 lea 26(%rcx), %rcx
970 lea 16(%rdx), %rdx
971
972 mov %rcx, %rax
973 and $-0x40, %rcx
974 sub %rcx, %rax
975 lea -10(%rcx), %rcx
976 sub %rax, %rdx
977# ifdef USE_AS_STRNCPY
978 add %rax, %r8
979# endif
980 movaps -6(%rcx), %xmm1
981
982/* 64 bytes loop */
983 .p2align 4
984L(Shl6LoopStart):
985 movaps 10(%rcx), %xmm2
986 movaps 26(%rcx), %xmm3
987 movaps %xmm3, %xmm6
988 movaps 42(%rcx), %xmm4
989 movaps %xmm4, %xmm7
990 movaps 58(%rcx), %xmm5
991 pminub %xmm2, %xmm6
992 pminub %xmm5, %xmm7
993 pminub %xmm6, %xmm7
994 pcmpeqb %xmm0, %xmm7
995 pmovmskb %xmm7, %rax
996 movaps %xmm5, %xmm7
997 palignr $6, %xmm4, %xmm5
998 test %rax, %rax
999 palignr $6, %xmm3, %xmm4
1000 jnz L(Shl6Start)
1001# ifdef USE_AS_STRNCPY
1002 sub $64, %r8
1003 jbe L(StrncpyLeave6)
1004# endif
1005 palignr $6, %xmm2, %xmm3
1006 lea 64(%rcx), %rcx
1007 palignr $6, %xmm1, %xmm2
1008 movaps %xmm7, %xmm1
1009 movaps %xmm5, 48(%rdx)
1010 movaps %xmm4, 32(%rdx)
1011 movaps %xmm3, 16(%rdx)
1012 movaps %xmm2, (%rdx)
1013 lea 64(%rdx), %rdx
1014 jmp L(Shl6LoopStart)
1015
1016L(Shl6LoopExit):
1017 mov (%rcx), %r9
1018 mov 6(%rcx), %esi
1019 mov %r9, (%rdx)
1020 mov %esi, 6(%rdx)
1021 mov $10, %rsi
1022 jmp L(CopyFrom1To16Bytes)
1023
1024 .p2align 4
1025L(Shl7):
1026 movaps -7(%rcx), %xmm1
1027 movaps 9(%rcx), %xmm2
1028L(Shl7Start):
1029 pcmpeqb %xmm2, %xmm0
1030 pmovmskb %xmm0, %rax
1031 movaps %xmm2, %xmm3
1032# ifdef USE_AS_STRNCPY
1033 sub $16, %r8
1034 jbe L(StrncpyExit7Case2OrCase3)
1035# endif
1036 test %rax, %rax
1037 jnz L(Shl7LoopExit)
1038
1039 palignr $7, %xmm1, %xmm2
1040 movaps %xmm2, (%rdx)
1041 movaps 25(%rcx), %xmm2
1042
1043 pcmpeqb %xmm2, %xmm0
1044 lea 16(%rdx), %rdx
1045 pmovmskb %xmm0, %rax
1046 lea 16(%rcx), %rcx
1047 movaps %xmm2, %xmm1
1048# ifdef USE_AS_STRNCPY
1049 sub $16, %r8
1050 jbe L(StrncpyExit7Case2OrCase3)
1051# endif
1052 test %rax, %rax
1053 jnz L(Shl7LoopExit)
1054
1055 palignr $7, %xmm3, %xmm2
1056 movaps %xmm2, (%rdx)
1057 movaps 25(%rcx), %xmm2
1058
1059 pcmpeqb %xmm2, %xmm0
1060 lea 16(%rdx), %rdx
1061 pmovmskb %xmm0, %rax
1062 lea 16(%rcx), %rcx
1063 movaps %xmm2, %xmm3
1064# ifdef USE_AS_STRNCPY
1065 sub $16, %r8
1066 jbe L(StrncpyExit7Case2OrCase3)
1067# endif
1068 test %rax, %rax
1069 jnz L(Shl7LoopExit)
1070
1071 palignr $7, %xmm1, %xmm2
1072 movaps %xmm2, (%rdx)
1073 movaps 25(%rcx), %xmm2
1074
1075 pcmpeqb %xmm2, %xmm0
1076 lea 16(%rdx), %rdx
1077 pmovmskb %xmm0, %rax
1078 lea 16(%rcx), %rcx
1079# ifdef USE_AS_STRNCPY
1080 sub $16, %r8
1081 jbe L(StrncpyExit7Case2OrCase3)
1082# endif
1083 test %rax, %rax
1084 jnz L(Shl7LoopExit)
1085
1086 palignr $7, %xmm3, %xmm2
1087 movaps %xmm2, (%rdx)
1088 lea 25(%rcx), %rcx
1089 lea 16(%rdx), %rdx
1090
1091 mov %rcx, %rax
1092 and $-0x40, %rcx
1093 sub %rcx, %rax
1094 lea -9(%rcx), %rcx
1095 sub %rax, %rdx
1096# ifdef USE_AS_STRNCPY
1097 add %rax, %r8
1098# endif
1099 movaps -7(%rcx), %xmm1
1100
1101/* 64 bytes loop */
1102 .p2align 4
1103L(Shl7LoopStart):
1104 movaps 9(%rcx), %xmm2
1105 movaps 25(%rcx), %xmm3
1106 movaps %xmm3, %xmm6
1107 movaps 41(%rcx), %xmm4
1108 movaps %xmm4, %xmm7
1109 movaps 57(%rcx), %xmm5
1110 pminub %xmm2, %xmm6
1111 pminub %xmm5, %xmm7
1112 pminub %xmm6, %xmm7
1113 pcmpeqb %xmm0, %xmm7
1114 pmovmskb %xmm7, %rax
1115 movaps %xmm5, %xmm7
1116 palignr $7, %xmm4, %xmm5
1117 test %rax, %rax
1118 palignr $7, %xmm3, %xmm4
1119 jnz L(Shl7Start)
1120# ifdef USE_AS_STRNCPY
1121 sub $64, %r8
1122 jbe L(StrncpyLeave7)
1123# endif
1124 palignr $7, %xmm2, %xmm3
1125 lea 64(%rcx), %rcx
1126 palignr $7, %xmm1, %xmm2
1127 movaps %xmm7, %xmm1
1128 movaps %xmm5, 48(%rdx)
1129 movaps %xmm4, 32(%rdx)
1130 movaps %xmm3, 16(%rdx)
1131 movaps %xmm2, (%rdx)
1132 lea 64(%rdx), %rdx
1133 jmp L(Shl7LoopStart)
1134
1135L(Shl7LoopExit):
1136 mov (%rcx), %r9
1137 mov 5(%rcx), %esi
1138 mov %r9, (%rdx)
1139 mov %esi, 5(%rdx)
1140 mov $9, %rsi
1141 jmp L(CopyFrom1To16Bytes)
1142
1143 .p2align 4
1144L(Shl8):
1145 movaps -8(%rcx), %xmm1
1146 movaps 8(%rcx), %xmm2
1147L(Shl8Start):
1148 pcmpeqb %xmm2, %xmm0
1149 pmovmskb %xmm0, %rax
1150 movaps %xmm2, %xmm3
1151# ifdef USE_AS_STRNCPY
1152 sub $16, %r8
1153 jbe L(StrncpyExit8Case2OrCase3)
1154# endif
1155 test %rax, %rax
1156 jnz L(Shl8LoopExit)
1157
1158 palignr $8, %xmm1, %xmm2
1159 movaps %xmm2, (%rdx)
1160 movaps 24(%rcx), %xmm2
1161
1162 pcmpeqb %xmm2, %xmm0
1163 lea 16(%rdx), %rdx
1164 pmovmskb %xmm0, %rax
1165 lea 16(%rcx), %rcx
1166 movaps %xmm2, %xmm1
1167# ifdef USE_AS_STRNCPY
1168 sub $16, %r8
1169 jbe L(StrncpyExit8Case2OrCase3)
1170# endif
1171 test %rax, %rax
1172 jnz L(Shl8LoopExit)
1173
1174 palignr $8, %xmm3, %xmm2
1175 movaps %xmm2, (%rdx)
1176 movaps 24(%rcx), %xmm2
1177
1178 pcmpeqb %xmm2, %xmm0
1179 lea 16(%rdx), %rdx
1180 pmovmskb %xmm0, %rax
1181 lea 16(%rcx), %rcx
1182 movaps %xmm2, %xmm3
1183# ifdef USE_AS_STRNCPY
1184 sub $16, %r8
1185 jbe L(StrncpyExit8Case2OrCase3)
1186# endif
1187 test %rax, %rax
1188 jnz L(Shl8LoopExit)
1189
1190 palignr $8, %xmm1, %xmm2
1191 movaps %xmm2, (%rdx)
1192 movaps 24(%rcx), %xmm2
1193
1194 pcmpeqb %xmm2, %xmm0
1195 lea 16(%rdx), %rdx
1196 pmovmskb %xmm0, %rax
1197 lea 16(%rcx), %rcx
1198# ifdef USE_AS_STRNCPY
1199 sub $16, %r8
1200 jbe L(StrncpyExit8Case2OrCase3)
1201# endif
1202 test %rax, %rax
1203 jnz L(Shl8LoopExit)
1204
1205 palignr $8, %xmm3, %xmm2
1206 movaps %xmm2, (%rdx)
1207 lea 24(%rcx), %rcx
1208 lea 16(%rdx), %rdx
1209
1210 mov %rcx, %rax
1211 and $-0x40, %rcx
1212 sub %rcx, %rax
1213 lea -8(%rcx), %rcx
1214 sub %rax, %rdx
1215# ifdef USE_AS_STRNCPY
1216 add %rax, %r8
1217# endif
1218 movaps -8(%rcx), %xmm1
1219
1220/* 64 bytes loop */
1221 .p2align 4
1222L(Shl8LoopStart):
1223 movaps 8(%rcx), %xmm2
1224 movaps 24(%rcx), %xmm3
1225 movaps %xmm3, %xmm6
1226 movaps 40(%rcx), %xmm4
1227 movaps %xmm4, %xmm7
1228 movaps 56(%rcx), %xmm5
1229 pminub %xmm2, %xmm6
1230 pminub %xmm5, %xmm7
1231 pminub %xmm6, %xmm7
1232 pcmpeqb %xmm0, %xmm7
1233 pmovmskb %xmm7, %rax
1234 movaps %xmm5, %xmm7
1235 palignr $8, %xmm4, %xmm5
1236 test %rax, %rax
1237 palignr $8, %xmm3, %xmm4
1238 jnz L(Shl8Start)
1239# ifdef USE_AS_STRNCPY
1240 sub $64, %r8
1241 jbe L(StrncpyLeave8)
1242# endif
1243 palignr $8, %xmm2, %xmm3
1244 lea 64(%rcx), %rcx
1245 palignr $8, %xmm1, %xmm2
1246 movaps %xmm7, %xmm1
1247 movaps %xmm5, 48(%rdx)
1248 movaps %xmm4, 32(%rdx)
1249 movaps %xmm3, 16(%rdx)
1250 movaps %xmm2, (%rdx)
1251 lea 64(%rdx), %rdx
1252 jmp L(Shl8LoopStart)
1253
1254L(Shl8LoopExit):
1255 mov (%rcx), %r9
1256 mov $8, %rsi
1257 mov %r9, (%rdx)
1258 jmp L(CopyFrom1To16Bytes)
1259
1260 .p2align 4
1261L(Shl9):
1262 movaps -9(%rcx), %xmm1
1263 movaps 7(%rcx), %xmm2
1264L(Shl9Start):
1265 pcmpeqb %xmm2, %xmm0
1266 pmovmskb %xmm0, %rax
1267 movaps %xmm2, %xmm3
1268# ifdef USE_AS_STRNCPY
1269 sub $16, %r8
1270 jbe L(StrncpyExit9Case2OrCase3)
1271# endif
1272 test %rax, %rax
1273 jnz L(Shl9LoopExit)
1274
1275 palignr $9, %xmm1, %xmm2
1276 movaps %xmm2, (%rdx)
1277 movaps 23(%rcx), %xmm2
1278
1279 pcmpeqb %xmm2, %xmm0
1280 lea 16(%rdx), %rdx
1281 pmovmskb %xmm0, %rax
1282 lea 16(%rcx), %rcx
1283 movaps %xmm2, %xmm1
1284# ifdef USE_AS_STRNCPY
1285 sub $16, %r8
1286 jbe L(StrncpyExit9Case2OrCase3)
1287# endif
1288 test %rax, %rax
1289 jnz L(Shl9LoopExit)
1290
1291 palignr $9, %xmm3, %xmm2
1292 movaps %xmm2, (%rdx)
1293 movaps 23(%rcx), %xmm2
1294
1295 pcmpeqb %xmm2, %xmm0
1296 lea 16(%rdx), %rdx
1297 pmovmskb %xmm0, %rax
1298 lea 16(%rcx), %rcx
1299 movaps %xmm2, %xmm3
1300# ifdef USE_AS_STRNCPY
1301 sub $16, %r8
1302 jbe L(StrncpyExit9Case2OrCase3)
1303# endif
1304 test %rax, %rax
1305 jnz L(Shl9LoopExit)
1306
1307 palignr $9, %xmm1, %xmm2
1308 movaps %xmm2, (%rdx)
1309 movaps 23(%rcx), %xmm2
1310
1311 pcmpeqb %xmm2, %xmm0
1312 lea 16(%rdx), %rdx
1313 pmovmskb %xmm0, %rax
1314 lea 16(%rcx), %rcx
1315# ifdef USE_AS_STRNCPY
1316 sub $16, %r8
1317 jbe L(StrncpyExit9Case2OrCase3)
1318# endif
1319 test %rax, %rax
1320 jnz L(Shl9LoopExit)
1321
1322 palignr $9, %xmm3, %xmm2
1323 movaps %xmm2, (%rdx)
1324 lea 23(%rcx), %rcx
1325 lea 16(%rdx), %rdx
1326
1327 mov %rcx, %rax
1328 and $-0x40, %rcx
1329 sub %rcx, %rax
1330 lea -7(%rcx), %rcx
1331 sub %rax, %rdx
1332# ifdef USE_AS_STRNCPY
1333 add %rax, %r8
1334# endif
1335 movaps -9(%rcx), %xmm1
1336
1337/* 64 bytes loop */
1338 .p2align 4
1339L(Shl9LoopStart):
1340 movaps 7(%rcx), %xmm2
1341 movaps 23(%rcx), %xmm3
1342 movaps %xmm3, %xmm6
1343 movaps 39(%rcx), %xmm4
1344 movaps %xmm4, %xmm7
1345 movaps 55(%rcx), %xmm5
1346 pminub %xmm2, %xmm6
1347 pminub %xmm5, %xmm7
1348 pminub %xmm6, %xmm7
1349 pcmpeqb %xmm0, %xmm7
1350 pmovmskb %xmm7, %rax
1351 movaps %xmm5, %xmm7
1352 palignr $9, %xmm4, %xmm5
1353 test %rax, %rax
1354 palignr $9, %xmm3, %xmm4
1355 jnz L(Shl9Start)
1356# ifdef USE_AS_STRNCPY
1357 sub $64, %r8
1358 jbe L(StrncpyLeave9)
1359# endif
1360 palignr $9, %xmm2, %xmm3
1361 lea 64(%rcx), %rcx
1362 palignr $9, %xmm1, %xmm2
1363 movaps %xmm7, %xmm1
1364 movaps %xmm5, 48(%rdx)
1365 movaps %xmm4, 32(%rdx)
1366 movaps %xmm3, 16(%rdx)
1367 movaps %xmm2, (%rdx)
1368 lea 64(%rdx), %rdx
1369 jmp L(Shl9LoopStart)
1370
1371L(Shl9LoopExit):
1372 mov -1(%rcx), %r9
1373 mov $7, %rsi
1374 mov %r9, -1(%rdx)
1375 jmp L(CopyFrom1To16Bytes)
1376
1377 .p2align 4
1378L(Shl10):
1379 movaps -10(%rcx), %xmm1
1380 movaps 6(%rcx), %xmm2
1381L(Shl10Start):
1382 pcmpeqb %xmm2, %xmm0
1383 pmovmskb %xmm0, %rax
1384 movaps %xmm2, %xmm3
1385# ifdef USE_AS_STRNCPY
1386 sub $16, %r8
1387 jbe L(StrncpyExit10Case2OrCase3)
1388# endif
1389 test %rax, %rax
1390 jnz L(Shl10LoopExit)
1391
1392 palignr $10, %xmm1, %xmm2
1393 movaps %xmm2, (%rdx)
1394 movaps 22(%rcx), %xmm2
1395
1396 pcmpeqb %xmm2, %xmm0
1397 lea 16(%rdx), %rdx
1398 pmovmskb %xmm0, %rax
1399 lea 16(%rcx), %rcx
1400 movaps %xmm2, %xmm1
1401# ifdef USE_AS_STRNCPY
1402 sub $16, %r8
1403 jbe L(StrncpyExit10Case2OrCase3)
1404# endif
1405 test %rax, %rax
1406 jnz L(Shl10LoopExit)
1407
1408 palignr $10, %xmm3, %xmm2
1409 movaps %xmm2, (%rdx)
1410 movaps 22(%rcx), %xmm2
1411
1412 pcmpeqb %xmm2, %xmm0
1413 lea 16(%rdx), %rdx
1414 pmovmskb %xmm0, %rax
1415 lea 16(%rcx), %rcx
1416 movaps %xmm2, %xmm3
1417# ifdef USE_AS_STRNCPY
1418 sub $16, %r8
1419 jbe L(StrncpyExit10Case2OrCase3)
1420# endif
1421 test %rax, %rax
1422 jnz L(Shl10LoopExit)
1423
1424 palignr $10, %xmm1, %xmm2
1425 movaps %xmm2, (%rdx)
1426 movaps 22(%rcx), %xmm2
1427
1428 pcmpeqb %xmm2, %xmm0
1429 lea 16(%rdx), %rdx
1430 pmovmskb %xmm0, %rax
1431 lea 16(%rcx), %rcx
1432# ifdef USE_AS_STRNCPY
1433 sub $16, %r8
1434 jbe L(StrncpyExit10Case2OrCase3)
1435# endif
1436 test %rax, %rax
1437 jnz L(Shl10LoopExit)
1438
1439 palignr $10, %xmm3, %xmm2
1440 movaps %xmm2, (%rdx)
1441 lea 22(%rcx), %rcx
1442 lea 16(%rdx), %rdx
1443
1444 mov %rcx, %rax
1445 and $-0x40, %rcx
1446 sub %rcx, %rax
1447 lea -6(%rcx), %rcx
1448 sub %rax, %rdx
1449# ifdef USE_AS_STRNCPY
1450 add %rax, %r8
1451# endif
1452 movaps -10(%rcx), %xmm1
1453
1454/* 64 bytes loop */
1455 .p2align 4
1456L(Shl10LoopStart):
1457 movaps 6(%rcx), %xmm2
1458 movaps 22(%rcx), %xmm3
1459 movaps %xmm3, %xmm6
1460 movaps 38(%rcx), %xmm4
1461 movaps %xmm4, %xmm7
1462 movaps 54(%rcx), %xmm5
1463 pminub %xmm2, %xmm6
1464 pminub %xmm5, %xmm7
1465 pminub %xmm6, %xmm7
1466 pcmpeqb %xmm0, %xmm7
1467 pmovmskb %xmm7, %rax
1468 movaps %xmm5, %xmm7
1469 palignr $10, %xmm4, %xmm5
1470 test %rax, %rax
1471 palignr $10, %xmm3, %xmm4
1472 jnz L(Shl10Start)
1473# ifdef USE_AS_STRNCPY
1474 sub $64, %r8
1475 jbe L(StrncpyLeave10)
1476# endif
1477 palignr $10, %xmm2, %xmm3
1478 lea 64(%rcx), %rcx
1479 palignr $10, %xmm1, %xmm2
1480 movaps %xmm7, %xmm1
1481 movaps %xmm5, 48(%rdx)
1482 movaps %xmm4, 32(%rdx)
1483 movaps %xmm3, 16(%rdx)
1484 movaps %xmm2, (%rdx)
1485 lea 64(%rdx), %rdx
1486 jmp L(Shl10LoopStart)
1487
1488L(Shl10LoopExit):
1489 mov -2(%rcx), %r9
1490 mov $6, %rsi
1491 mov %r9, -2(%rdx)
1492 jmp L(CopyFrom1To16Bytes)
1493
1494 .p2align 4
1495L(Shl11):
1496 movaps -11(%rcx), %xmm1
1497 movaps 5(%rcx), %xmm2
1498L(Shl11Start):
1499 pcmpeqb %xmm2, %xmm0
1500 pmovmskb %xmm0, %rax
1501 movaps %xmm2, %xmm3
1502# ifdef USE_AS_STRNCPY
1503 sub $16, %r8
1504 jbe L(StrncpyExit11Case2OrCase3)
1505# endif
1506 test %rax, %rax
1507 jnz L(Shl11LoopExit)
1508
1509 palignr $11, %xmm1, %xmm2
1510 movaps %xmm2, (%rdx)
1511 movaps 21(%rcx), %xmm2
1512
1513 pcmpeqb %xmm2, %xmm0
1514 lea 16(%rdx), %rdx
1515 pmovmskb %xmm0, %rax
1516 lea 16(%rcx), %rcx
1517 movaps %xmm2, %xmm1
1518# ifdef USE_AS_STRNCPY
1519 sub $16, %r8
1520 jbe L(StrncpyExit11Case2OrCase3)
1521# endif
1522 test %rax, %rax
1523 jnz L(Shl11LoopExit)
1524
1525 palignr $11, %xmm3, %xmm2
1526 movaps %xmm2, (%rdx)
1527 movaps 21(%rcx), %xmm2
1528
1529 pcmpeqb %xmm2, %xmm0
1530 lea 16(%rdx), %rdx
1531 pmovmskb %xmm0, %rax
1532 lea 16(%rcx), %rcx
1533 movaps %xmm2, %xmm3
1534# ifdef USE_AS_STRNCPY
1535 sub $16, %r8
1536 jbe L(StrncpyExit11Case2OrCase3)
1537# endif
1538 test %rax, %rax
1539 jnz L(Shl11LoopExit)
1540
1541 palignr $11, %xmm1, %xmm2
1542 movaps %xmm2, (%rdx)
1543 movaps 21(%rcx), %xmm2
1544
1545 pcmpeqb %xmm2, %xmm0
1546 lea 16(%rdx), %rdx
1547 pmovmskb %xmm0, %rax
1548 lea 16(%rcx), %rcx
1549# ifdef USE_AS_STRNCPY
1550 sub $16, %r8
1551 jbe L(StrncpyExit11Case2OrCase3)
1552# endif
1553 test %rax, %rax
1554 jnz L(Shl11LoopExit)
1555
1556 palignr $11, %xmm3, %xmm2
1557 movaps %xmm2, (%rdx)
1558 lea 21(%rcx), %rcx
1559 lea 16(%rdx), %rdx
1560
1561 mov %rcx, %rax
1562 and $-0x40, %rcx
1563 sub %rcx, %rax
1564 lea -5(%rcx), %rcx
1565 sub %rax, %rdx
1566# ifdef USE_AS_STRNCPY
1567 add %rax, %r8
1568# endif
1569 movaps -11(%rcx), %xmm1
1570
1571/* 64 bytes loop */
1572 .p2align 4
1573L(Shl11LoopStart):
1574 movaps 5(%rcx), %xmm2
1575 movaps 21(%rcx), %xmm3
1576 movaps %xmm3, %xmm6
1577 movaps 37(%rcx), %xmm4
1578 movaps %xmm4, %xmm7
1579 movaps 53(%rcx), %xmm5
1580 pminub %xmm2, %xmm6
1581 pminub %xmm5, %xmm7
1582 pminub %xmm6, %xmm7
1583 pcmpeqb %xmm0, %xmm7
1584 pmovmskb %xmm7, %rax
1585 movaps %xmm5, %xmm7
1586 palignr $11, %xmm4, %xmm5
1587 test %rax, %rax
1588 palignr $11, %xmm3, %xmm4
1589 jnz L(Shl11Start)
1590# ifdef USE_AS_STRNCPY
1591 sub $64, %r8
1592 jbe L(StrncpyLeave11)
1593# endif
1594 palignr $11, %xmm2, %xmm3
1595 lea 64(%rcx), %rcx
1596 palignr $11, %xmm1, %xmm2
1597 movaps %xmm7, %xmm1
1598 movaps %xmm5, 48(%rdx)
1599 movaps %xmm4, 32(%rdx)
1600 movaps %xmm3, 16(%rdx)
1601 movaps %xmm2, (%rdx)
1602 lea 64(%rdx), %rdx
1603 jmp L(Shl11LoopStart)
1604
1605L(Shl11LoopExit):
1606 mov -3(%rcx), %r9
1607 mov $5, %rsi
1608 mov %r9, -3(%rdx)
1609 jmp L(CopyFrom1To16Bytes)
1610
1611 .p2align 4
1612L(Shl12):
1613 movaps -12(%rcx), %xmm1
1614 movaps 4(%rcx), %xmm2
1615L(Shl12Start):
1616 pcmpeqb %xmm2, %xmm0
1617 pmovmskb %xmm0, %rax
1618 movaps %xmm2, %xmm3
1619# ifdef USE_AS_STRNCPY
1620 sub $16, %r8
1621 jbe L(StrncpyExit12Case2OrCase3)
1622# endif
1623 test %rax, %rax
1624 jnz L(Shl12LoopExit)
1625
1626 palignr $12, %xmm1, %xmm2
1627 movaps %xmm2, (%rdx)
1628 movaps 20(%rcx), %xmm2
1629
1630 pcmpeqb %xmm2, %xmm0
1631 lea 16(%rdx), %rdx
1632 pmovmskb %xmm0, %rax
1633 lea 16(%rcx), %rcx
1634 movaps %xmm2, %xmm1
1635# ifdef USE_AS_STRNCPY
1636 sub $16, %r8
1637 jbe L(StrncpyExit12Case2OrCase3)
1638# endif
1639 test %rax, %rax
1640 jnz L(Shl12LoopExit)
1641
1642 palignr $12, %xmm3, %xmm2
1643 movaps %xmm2, (%rdx)
1644 movaps 20(%rcx), %xmm2
1645
1646 pcmpeqb %xmm2, %xmm0
1647 lea 16(%rdx), %rdx
1648 pmovmskb %xmm0, %rax
1649 lea 16(%rcx), %rcx
1650 movaps %xmm2, %xmm3
1651# ifdef USE_AS_STRNCPY
1652 sub $16, %r8
1653 jbe L(StrncpyExit12Case2OrCase3)
1654# endif
1655 test %rax, %rax
1656 jnz L(Shl12LoopExit)
1657
1658 palignr $12, %xmm1, %xmm2
1659 movaps %xmm2, (%rdx)
1660 movaps 20(%rcx), %xmm2
1661
1662 pcmpeqb %xmm2, %xmm0
1663 lea 16(%rdx), %rdx
1664 pmovmskb %xmm0, %rax
1665 lea 16(%rcx), %rcx
1666# ifdef USE_AS_STRNCPY
1667 sub $16, %r8
1668 jbe L(StrncpyExit12Case2OrCase3)
1669# endif
1670 test %rax, %rax
1671 jnz L(Shl12LoopExit)
1672
1673 palignr $12, %xmm3, %xmm2
1674 movaps %xmm2, (%rdx)
1675 lea 20(%rcx), %rcx
1676 lea 16(%rdx), %rdx
1677
1678 mov %rcx, %rax
1679 and $-0x40, %rcx
1680 sub %rcx, %rax
1681 lea -4(%rcx), %rcx
1682 sub %rax, %rdx
1683# ifdef USE_AS_STRNCPY
1684 add %rax, %r8
1685# endif
1686 movaps -12(%rcx), %xmm1
1687
1688/* 64 bytes loop */
1689 .p2align 4
1690L(Shl12LoopStart):
1691 movaps 4(%rcx), %xmm2
1692 movaps 20(%rcx), %xmm3
1693 movaps %xmm3, %xmm6
1694 movaps 36(%rcx), %xmm4
1695 movaps %xmm4, %xmm7
1696 movaps 52(%rcx), %xmm5
1697 pminub %xmm2, %xmm6
1698 pminub %xmm5, %xmm7
1699 pminub %xmm6, %xmm7
1700 pcmpeqb %xmm0, %xmm7
1701 pmovmskb %xmm7, %rax
1702 movaps %xmm5, %xmm7
1703 palignr $12, %xmm4, %xmm5
1704 test %rax, %rax
1705 palignr $12, %xmm3, %xmm4
1706 jnz L(Shl12Start)
1707# ifdef USE_AS_STRNCPY
1708 sub $64, %r8
1709 jbe L(StrncpyLeave12)
1710# endif
1711 palignr $12, %xmm2, %xmm3
1712 lea 64(%rcx), %rcx
1713 palignr $12, %xmm1, %xmm2
1714 movaps %xmm7, %xmm1
1715 movaps %xmm5, 48(%rdx)
1716 movaps %xmm4, 32(%rdx)
1717 movaps %xmm3, 16(%rdx)
1718 movaps %xmm2, (%rdx)
1719 lea 64(%rdx), %rdx
1720 jmp L(Shl12LoopStart)
1721
1722L(Shl12LoopExit):
1723 mov (%rcx), %r9d
1724 mov $4, %rsi
1725 mov %r9d, (%rdx)
1726 jmp L(CopyFrom1To16Bytes)
1727
1728 .p2align 4
1729L(Shl13):
1730 movaps -13(%rcx), %xmm1
1731 movaps 3(%rcx), %xmm2
1732L(Shl13Start):
1733 pcmpeqb %xmm2, %xmm0
1734 pmovmskb %xmm0, %rax
1735 movaps %xmm2, %xmm3
1736# ifdef USE_AS_STRNCPY
1737 sub $16, %r8
1738 jbe L(StrncpyExit13Case2OrCase3)
1739# endif
1740 test %rax, %rax
1741 jnz L(Shl13LoopExit)
1742
1743 palignr $13, %xmm1, %xmm2
1744 movaps %xmm2, (%rdx)
1745 movaps 19(%rcx), %xmm2
1746
1747 pcmpeqb %xmm2, %xmm0
1748 lea 16(%rdx), %rdx
1749 pmovmskb %xmm0, %rax
1750 lea 16(%rcx), %rcx
1751 movaps %xmm2, %xmm1
1752# ifdef USE_AS_STRNCPY
1753 sub $16, %r8
1754 jbe L(StrncpyExit13Case2OrCase3)
1755# endif
1756 test %rax, %rax
1757 jnz L(Shl13LoopExit)
1758
1759 palignr $13, %xmm3, %xmm2
1760 movaps %xmm2, (%rdx)
1761 movaps 19(%rcx), %xmm2
1762
1763 pcmpeqb %xmm2, %xmm0
1764 lea 16(%rdx), %rdx
1765 pmovmskb %xmm0, %rax
1766 lea 16(%rcx), %rcx
1767 movaps %xmm2, %xmm3
1768# ifdef USE_AS_STRNCPY
1769 sub $16, %r8
1770 jbe L(StrncpyExit13Case2OrCase3)
1771# endif
1772 test %rax, %rax
1773 jnz L(Shl13LoopExit)
1774
1775 palignr $13, %xmm1, %xmm2
1776 movaps %xmm2, (%rdx)
1777 movaps 19(%rcx), %xmm2
1778
1779 pcmpeqb %xmm2, %xmm0
1780 lea 16(%rdx), %rdx
1781 pmovmskb %xmm0, %rax
1782 lea 16(%rcx), %rcx
1783# ifdef USE_AS_STRNCPY
1784 sub $16, %r8
1785 jbe L(StrncpyExit13Case2OrCase3)
1786# endif
1787 test %rax, %rax
1788 jnz L(Shl13LoopExit)
1789
1790 palignr $13, %xmm3, %xmm2
1791 movaps %xmm2, (%rdx)
1792 lea 19(%rcx), %rcx
1793 lea 16(%rdx), %rdx
1794
1795 mov %rcx, %rax
1796 and $-0x40, %rcx
1797 sub %rcx, %rax
1798 lea -3(%rcx), %rcx
1799 sub %rax, %rdx
1800# ifdef USE_AS_STRNCPY
1801 add %rax, %r8
1802# endif
1803 movaps -13(%rcx), %xmm1
1804
1805/* 64 bytes loop */
1806 .p2align 4
1807L(Shl13LoopStart):
1808 movaps 3(%rcx), %xmm2
1809 movaps 19(%rcx), %xmm3
1810 movaps %xmm3, %xmm6
1811 movaps 35(%rcx), %xmm4
1812 movaps %xmm4, %xmm7
1813 movaps 51(%rcx), %xmm5
1814 pminub %xmm2, %xmm6
1815 pminub %xmm5, %xmm7
1816 pminub %xmm6, %xmm7
1817 pcmpeqb %xmm0, %xmm7
1818 pmovmskb %xmm7, %rax
1819 movaps %xmm5, %xmm7
1820 palignr $13, %xmm4, %xmm5
1821 test %rax, %rax
1822 palignr $13, %xmm3, %xmm4
1823 jnz L(Shl13Start)
1824# ifdef USE_AS_STRNCPY
1825 sub $64, %r8
1826 jbe L(StrncpyLeave13)
1827# endif
1828 palignr $13, %xmm2, %xmm3
1829 lea 64(%rcx), %rcx
1830 palignr $13, %xmm1, %xmm2
1831 movaps %xmm7, %xmm1
1832 movaps %xmm5, 48(%rdx)
1833 movaps %xmm4, 32(%rdx)
1834 movaps %xmm3, 16(%rdx)
1835 movaps %xmm2, (%rdx)
1836 lea 64(%rdx), %rdx
1837 jmp L(Shl13LoopStart)
1838
1839L(Shl13LoopExit):
1840 mov -1(%rcx), %r9d
1841 mov $3, %rsi
1842 mov %r9d, -1(%rdx)
1843 jmp L(CopyFrom1To16Bytes)
1844
1845 .p2align 4
1846L(Shl14):
1847 movaps -14(%rcx), %xmm1
1848 movaps 2(%rcx), %xmm2
1849L(Shl14Start):
1850 pcmpeqb %xmm2, %xmm0
1851 pmovmskb %xmm0, %rax
1852 movaps %xmm2, %xmm3
1853# ifdef USE_AS_STRNCPY
1854 sub $16, %r8
1855 jbe L(StrncpyExit14Case2OrCase3)
1856# endif
1857 test %rax, %rax
1858 jnz L(Shl14LoopExit)
1859
1860 palignr $14, %xmm1, %xmm2
1861 movaps %xmm2, (%rdx)
1862 movaps 18(%rcx), %xmm2
1863
1864 pcmpeqb %xmm2, %xmm0
1865 lea 16(%rdx), %rdx
1866 pmovmskb %xmm0, %rax
1867 lea 16(%rcx), %rcx
1868 movaps %xmm2, %xmm1
1869# ifdef USE_AS_STRNCPY
1870 sub $16, %r8
1871 jbe L(StrncpyExit14Case2OrCase3)
1872# endif
1873 test %rax, %rax
1874 jnz L(Shl14LoopExit)
1875
1876 palignr $14, %xmm3, %xmm2
1877 movaps %xmm2, (%rdx)
1878 movaps 18(%rcx), %xmm2
1879
1880 pcmpeqb %xmm2, %xmm0
1881 lea 16(%rdx), %rdx
1882 pmovmskb %xmm0, %rax
1883 lea 16(%rcx), %rcx
1884 movaps %xmm2, %xmm3
1885# ifdef USE_AS_STRNCPY
1886 sub $16, %r8
1887 jbe L(StrncpyExit14Case2OrCase3)
1888# endif
1889 test %rax, %rax
1890 jnz L(Shl14LoopExit)
1891
1892 palignr $14, %xmm1, %xmm2
1893 movaps %xmm2, (%rdx)
1894 movaps 18(%rcx), %xmm2
1895
1896 pcmpeqb %xmm2, %xmm0
1897 lea 16(%rdx), %rdx
1898 pmovmskb %xmm0, %rax
1899 lea 16(%rcx), %rcx
1900# ifdef USE_AS_STRNCPY
1901 sub $16, %r8
1902 jbe L(StrncpyExit14Case2OrCase3)
1903# endif
1904 test %rax, %rax
1905 jnz L(Shl14LoopExit)
1906
1907 palignr $14, %xmm3, %xmm2
1908 movaps %xmm2, (%rdx)
1909 lea 18(%rcx), %rcx
1910 lea 16(%rdx), %rdx
1911
1912 mov %rcx, %rax
1913 and $-0x40, %rcx
1914 sub %rcx, %rax
1915 lea -2(%rcx), %rcx
1916 sub %rax, %rdx
1917# ifdef USE_AS_STRNCPY
1918 add %rax, %r8
1919# endif
1920 movaps -14(%rcx), %xmm1
1921
1922/* 64 bytes loop */
1923 .p2align 4
1924L(Shl14LoopStart):
1925 movaps 2(%rcx), %xmm2
1926 movaps 18(%rcx), %xmm3
1927 movaps %xmm3, %xmm6
1928 movaps 34(%rcx), %xmm4
1929 movaps %xmm4, %xmm7
1930 movaps 50(%rcx), %xmm5
1931 pminub %xmm2, %xmm6
1932 pminub %xmm5, %xmm7
1933 pminub %xmm6, %xmm7
1934 pcmpeqb %xmm0, %xmm7
1935 pmovmskb %xmm7, %rax
1936 movaps %xmm5, %xmm7
1937 palignr $14, %xmm4, %xmm5
1938 test %rax, %rax
1939 palignr $14, %xmm3, %xmm4
1940 jnz L(Shl14Start)
1941# ifdef USE_AS_STRNCPY
1942 sub $64, %r8
1943 jbe L(StrncpyLeave14)
1944# endif
1945 palignr $14, %xmm2, %xmm3
1946 lea 64(%rcx), %rcx
1947 palignr $14, %xmm1, %xmm2
1948 movaps %xmm7, %xmm1
1949 movaps %xmm5, 48(%rdx)
1950 movaps %xmm4, 32(%rdx)
1951 movaps %xmm3, 16(%rdx)
1952 movaps %xmm2, (%rdx)
1953 lea 64(%rdx), %rdx
1954 jmp L(Shl14LoopStart)
1955
1956L(Shl14LoopExit):
1957 mov -2(%rcx), %r9d
1958 mov $2, %rsi
1959 mov %r9d, -2(%rdx)
1960 jmp L(CopyFrom1To16Bytes)
1961
1962 .p2align 4
1963L(Shl15):
1964 movaps -15(%rcx), %xmm1
1965 movaps 1(%rcx), %xmm2
1966L(Shl15Start):
1967 pcmpeqb %xmm2, %xmm0
1968 pmovmskb %xmm0, %rax
1969 movaps %xmm2, %xmm3
1970# ifdef USE_AS_STRNCPY
1971 sub $16, %r8
1972 jbe L(StrncpyExit15Case2OrCase3)
1973# endif
1974 test %rax, %rax
1975 jnz L(Shl15LoopExit)
1976
1977 palignr $15, %xmm1, %xmm2
1978 movaps %xmm2, (%rdx)
1979 movaps 17(%rcx), %xmm2
1980
1981 pcmpeqb %xmm2, %xmm0
1982 lea 16(%rdx), %rdx
1983 pmovmskb %xmm0, %rax
1984 lea 16(%rcx), %rcx
1985 movaps %xmm2, %xmm1
1986# ifdef USE_AS_STRNCPY
1987 sub $16, %r8
1988 jbe L(StrncpyExit15Case2OrCase3)
1989# endif
1990 test %rax, %rax
1991 jnz L(Shl15LoopExit)
1992
1993 palignr $15, %xmm3, %xmm2
1994 movaps %xmm2, (%rdx)
1995 movaps 17(%rcx), %xmm2
1996
1997 pcmpeqb %xmm2, %xmm0
1998 lea 16(%rdx), %rdx
1999 pmovmskb %xmm0, %rax
2000 lea 16(%rcx), %rcx
2001 movaps %xmm2, %xmm3
2002# ifdef USE_AS_STRNCPY
2003 sub $16, %r8
2004 jbe L(StrncpyExit15Case2OrCase3)
2005# endif
2006 test %rax, %rax
2007 jnz L(Shl15LoopExit)
2008
2009 palignr $15, %xmm1, %xmm2
2010 movaps %xmm2, (%rdx)
2011 movaps 17(%rcx), %xmm2
2012
2013 pcmpeqb %xmm2, %xmm0
2014 lea 16(%rdx), %rdx
2015 pmovmskb %xmm0, %rax
2016 lea 16(%rcx), %rcx
2017# ifdef USE_AS_STRNCPY
2018 sub $16, %r8
2019 jbe L(StrncpyExit15Case2OrCase3)
2020# endif
2021 test %rax, %rax
2022 jnz L(Shl15LoopExit)
2023
2024 palignr $15, %xmm3, %xmm2
2025 movaps %xmm2, (%rdx)
2026 lea 17(%rcx), %rcx
2027 lea 16(%rdx), %rdx
2028
2029 mov %rcx, %rax
2030 and $-0x40, %rcx
2031 sub %rcx, %rax
2032 lea -1(%rcx), %rcx
2033 sub %rax, %rdx
2034# ifdef USE_AS_STRNCPY
2035 add %rax, %r8
2036# endif
2037 movaps -15(%rcx), %xmm1
2038
2039/* 64 bytes loop */
2040 .p2align 4
2041L(Shl15LoopStart):
2042 movaps 1(%rcx), %xmm2
2043 movaps 17(%rcx), %xmm3
2044 movaps %xmm3, %xmm6
2045 movaps 33(%rcx), %xmm4
2046 movaps %xmm4, %xmm7
2047 movaps 49(%rcx), %xmm5
2048 pminub %xmm2, %xmm6
2049 pminub %xmm5, %xmm7
2050 pminub %xmm6, %xmm7
2051 pcmpeqb %xmm0, %xmm7
2052 pmovmskb %xmm7, %rax
2053 movaps %xmm5, %xmm7
2054 palignr $15, %xmm4, %xmm5
2055 test %rax, %rax
2056 palignr $15, %xmm3, %xmm4
2057 jnz L(Shl15Start)
2058# ifdef USE_AS_STRNCPY
2059 sub $64, %r8
2060 jbe L(StrncpyLeave15)
2061# endif
2062 palignr $15, %xmm2, %xmm3
2063 lea 64(%rcx), %rcx
2064 palignr $15, %xmm1, %xmm2
2065 movaps %xmm7, %xmm1
2066 movaps %xmm5, 48(%rdx)
2067 movaps %xmm4, 32(%rdx)
2068 movaps %xmm3, 16(%rdx)
2069 movaps %xmm2, (%rdx)
2070 lea 64(%rdx), %rdx
2071 jmp L(Shl15LoopStart)
2072
2073L(Shl15LoopExit):
2074 mov -3(%rcx), %r9d
2075 mov $1, %rsi
2076 mov %r9d, -3(%rdx)
2077# ifdef USE_AS_STRCAT
2078 jmp L(CopyFrom1To16Bytes)
2079# endif
2080
2081# ifndef USE_AS_STRCAT
2082
2083 .p2align 4
2084L(CopyFrom1To16Bytes):
2085# ifdef USE_AS_STRNCPY
2086 add $16, %r8
2087# endif
2088 add %rsi, %rdx
2089 add %rsi, %rcx
2090
2091 test %al, %al
2092 jz L(ExitHigh)
2093 test $0x01, %al
2094 jnz L(Exit1)
2095 test $0x02, %al
2096 jnz L(Exit2)
2097 test $0x04, %al
2098 jnz L(Exit3)
2099 test $0x08, %al
2100 jnz L(Exit4)
2101 test $0x10, %al
2102 jnz L(Exit5)
2103 test $0x20, %al
2104 jnz L(Exit6)
2105 test $0x40, %al
2106 jnz L(Exit7)
2107
2108 .p2align 4
2109L(Exit8):
2110 mov (%rcx), %rax
2111 mov %rax, (%rdx)
2112# ifdef USE_AS_STPCPY
2113 lea 7(%rdx), %rax
2114# else
2115 mov %rdi, %rax
2116# endif
2117# ifdef USE_AS_STRNCPY
2118 sub $8, %r8
2119 lea 8(%rdx), %rcx
2120 jnz L(StrncpyFillTailWithZero1)
2121# ifdef USE_AS_STPCPY
2122 cmpb $1, (%rax)
2123 sbb $-1, %rax
2124# endif
2125# endif
2126 ret
2127
2128 .p2align 4
2129L(ExitHigh):
2130 test $0x01, %ah
2131 jnz L(Exit9)
2132 test $0x02, %ah
2133 jnz L(Exit10)
2134 test $0x04, %ah
2135 jnz L(Exit11)
2136 test $0x08, %ah
2137 jnz L(Exit12)
2138 test $0x10, %ah
2139 jnz L(Exit13)
2140 test $0x20, %ah
2141 jnz L(Exit14)
2142 test $0x40, %ah
2143 jnz L(Exit15)
2144
2145 .p2align 4
2146L(Exit16):
2147 mov (%rcx), %rax
2148 mov %rax, (%rdx)
2149 mov 8(%rcx), %rax
2150 mov %rax, 8(%rdx)
2151# ifdef USE_AS_STPCPY
2152 lea 15(%rdx), %rax
2153# else
2154 mov %rdi, %rax
2155# endif
2156# ifdef USE_AS_STRNCPY
2157 sub $16, %r8
2158 lea 16(%rdx), %rcx
2159 jnz L(StrncpyFillTailWithZero1)
2160# ifdef USE_AS_STPCPY
2161 cmpb $1, (%rax)
2162 sbb $-1, %rax
2163# endif
2164# endif
2165 ret
2166
2167# ifdef USE_AS_STRNCPY
2168
2169 .p2align 4
2170L(CopyFrom1To16BytesCase2):
2171 add $16, %r8
2172 add %rsi, %rcx
2173 lea (%rsi, %rdx), %rsi
2174 lea -9(%r8), %rdx
2175 and $1<<7, %dh
2176 or %al, %dh
2177 test %dh, %dh
2178 lea (%rsi), %rdx
2179 jz L(ExitHighCase2)
2180
2181 cmp $1, %r8
2182 je L(Exit1)
2183 test $0x01, %al
2184 jnz L(Exit1)
2185 cmp $2, %r8
2186 je L(Exit2)
2187 test $0x02, %al
2188 jnz L(Exit2)
2189 cmp $3, %r8
2190 je L(Exit3)
2191 test $0x04, %al
2192 jnz L(Exit3)
2193 cmp $4, %r8
2194 je L(Exit4)
2195 test $0x08, %al
2196 jnz L(Exit4)
2197 cmp $5, %r8
2198 je L(Exit5)
2199 test $0x10, %al
2200 jnz L(Exit5)
2201 cmp $6, %r8
2202 je L(Exit6)
2203 test $0x20, %al
2204 jnz L(Exit6)
2205 cmp $7, %r8
2206 je L(Exit7)
2207 test $0x40, %al
2208 jnz L(Exit7)
2209 jmp L(Exit8)
2210
2211 .p2align 4
2212L(ExitHighCase2):
2213 cmp $9, %r8
2214 je L(Exit9)
2215 test $0x01, %ah
2216 jnz L(Exit9)
2217 cmp $10, %r8
2218 je L(Exit10)
2219 test $0x02, %ah
2220 jnz L(Exit10)
2221 cmp $11, %r8
2222 je L(Exit11)
2223 test $0x04, %ah
2224 jnz L(Exit11)
2225 cmp $12, %r8
2226 je L(Exit12)
2227 test $0x8, %ah
2228 jnz L(Exit12)
2229 cmp $13, %r8
2230 je L(Exit13)
2231 test $0x10, %ah
2232 jnz L(Exit13)
2233 cmp $14, %r8
2234 je L(Exit14)
2235 test $0x20, %ah
2236 jnz L(Exit14)
2237 cmp $15, %r8
2238 je L(Exit15)
2239 test $0x40, %ah
2240 jnz L(Exit15)
2241 jmp L(Exit16)
2242
2243L(CopyFrom1To16BytesCase2OrCase3):
2244 test %rax, %rax
2245 jnz L(CopyFrom1To16BytesCase2)
2246
2247 .p2align 4
2248L(CopyFrom1To16BytesCase3):
2249 add $16, %r8
2250 add %rsi, %rdx
2251 add %rsi, %rcx
2252
2253 cmp $16, %r8
2254 je L(Exit16)
2255 cmp $8, %r8
2256 je L(Exit8)
2257 jg L(More8Case3)
2258 cmp $4, %r8
2259 je L(Exit4)
2260 jg L(More4Case3)
2261 cmp $2, %r8
2262 jl L(Exit1)
2263 je L(Exit2)
2264 jg L(Exit3)
2265L(More8Case3): /* but less than 16 */
2266 cmp $12, %r8
2267 je L(Exit12)
2268 jl L(Less12Case3)
2269 cmp $14, %r8
2270 jl L(Exit13)
2271 je L(Exit14)
2272 jg L(Exit15)
2273L(More4Case3): /* but less than 8 */
2274 cmp $6, %r8
2275 jl L(Exit5)
2276 je L(Exit6)
2277 jg L(Exit7)
2278L(Less12Case3): /* but more than 8 */
2279 cmp $10, %r8
2280 jl L(Exit9)
2281 je L(Exit10)
2282 jg L(Exit11)
2283# endif
2284
2285 .p2align 4
2286L(Exit1):
2287 movb (%rcx), %al
2288 movb %al, (%rdx)
2289# ifdef USE_AS_STPCPY
2290 lea (%rdx), %rax
2291# else
2292 mov %rdi, %rax
2293# endif
2294# ifdef USE_AS_STRNCPY
2295 sub $1, %r8
2296 lea 1(%rdx), %rcx
2297 jnz L(StrncpyFillTailWithZero1)
2298# ifdef USE_AS_STPCPY
2299 cmpb $1, (%rax)
2300 sbb $-1, %rax
2301# endif
2302# endif
2303 ret
2304
2305 .p2align 4
2306L(Exit2):
2307 movw (%rcx), %ax
2308 movw %ax, (%rdx)
2309# ifdef USE_AS_STPCPY
2310 lea 1(%rdx), %rax
2311# else
2312 mov %rdi, %rax
2313# endif
2314# ifdef USE_AS_STRNCPY
2315 sub $2, %r8
2316 lea 2(%rdx), %rcx
2317 jnz L(StrncpyFillTailWithZero1)
2318# ifdef USE_AS_STPCPY
2319 cmpb $1, (%rax)
2320 sbb $-1, %rax
2321# endif
2322# endif
2323 ret
2324
2325 .p2align 4
2326L(Exit3):
2327 movw (%rcx), %ax
2328 movw %ax, (%rdx)
2329 movb 2(%rcx), %al
2330 movb %al, 2(%rdx)
2331# ifdef USE_AS_STPCPY
2332 lea 2(%rdx), %rax
2333# else
2334 mov %rdi, %rax
2335# endif
2336# ifdef USE_AS_STRNCPY
2337 sub $3, %r8
2338 lea 3(%rdx), %rcx
2339 jnz L(StrncpyFillTailWithZero1)
2340# ifdef USE_AS_STPCPY
2341 cmpb $1, (%rax)
2342 sbb $-1, %rax
2343# endif
2344# endif
2345 ret
2346
2347 .p2align 4
2348L(Exit4):
2349 movl (%rcx), %eax
2350 movl %eax, (%rdx)
2351# ifdef USE_AS_STPCPY
2352 lea 3(%rdx), %rax
2353# else
2354 mov %rdi, %rax
2355# endif
2356# ifdef USE_AS_STRNCPY
2357 sub $4, %r8
2358 lea 4(%rdx), %rcx
2359 jnz L(StrncpyFillTailWithZero1)
2360# ifdef USE_AS_STPCPY
2361 cmpb $1, (%rax)
2362 sbb $-1, %rax
2363# endif
2364# endif
2365 ret
2366
2367 .p2align 4
2368L(Exit5):
2369 movl (%rcx), %eax
2370 movl %eax, (%rdx)
2371 movb 4(%rcx), %al
2372 movb %al, 4(%rdx)
2373# ifdef USE_AS_STPCPY
2374 lea 4(%rdx), %rax
2375# else
2376 mov %rdi, %rax
2377# endif
2378# ifdef USE_AS_STRNCPY
2379 sub $5, %r8
2380 lea 5(%rdx), %rcx
2381 jnz L(StrncpyFillTailWithZero1)
2382# ifdef USE_AS_STPCPY
2383 cmpb $1, (%rax)
2384 sbb $-1, %rax
2385# endif
2386# endif
2387 ret
2388
2389 .p2align 4
2390L(Exit6):
2391 movl (%rcx), %eax
2392 movl %eax, (%rdx)
2393 movw 4(%rcx), %ax
2394 movw %ax, 4(%rdx)
2395# ifdef USE_AS_STPCPY
2396 lea 5(%rdx), %rax
2397# else
2398 mov %rdi, %rax
2399# endif
2400# ifdef USE_AS_STRNCPY
2401 sub $6, %r8
2402 lea 6(%rdx), %rcx
2403 jnz L(StrncpyFillTailWithZero1)
2404# ifdef USE_AS_STPCPY
2405 cmpb $1, (%rax)
2406 sbb $-1, %rax
2407# endif
2408# endif
2409 ret
2410
2411 .p2align 4
2412L(Exit7):
2413 movl (%rcx), %eax
2414 movl %eax, (%rdx)
2415 movl 3(%rcx), %eax
2416 movl %eax, 3(%rdx)
2417# ifdef USE_AS_STPCPY
2418 lea 6(%rdx), %rax
2419# else
2420 mov %rdi, %rax
2421# endif
2422# ifdef USE_AS_STRNCPY
2423 sub $7, %r8
2424 lea 7(%rdx), %rcx
2425 jnz L(StrncpyFillTailWithZero1)
2426# ifdef USE_AS_STPCPY
2427 cmpb $1, (%rax)
2428 sbb $-1, %rax
2429# endif
2430# endif
2431 ret
2432
2433 .p2align 4
2434L(Exit9):
2435 mov (%rcx), %rax
2436 mov %rax, (%rdx)
2437 mov 5(%rcx), %eax
2438 mov %eax, 5(%rdx)
2439# ifdef USE_AS_STPCPY
2440 lea 8(%rdx), %rax
2441# else
2442 mov %rdi, %rax
2443# endif
2444# ifdef USE_AS_STRNCPY
2445 sub $9, %r8
2446 lea 9(%rdx), %rcx
2447 jnz L(StrncpyFillTailWithZero1)
2448# ifdef USE_AS_STPCPY
2449 cmpb $1, (%rax)
2450 sbb $-1, %rax
2451# endif
2452# endif
2453 ret
2454
2455 .p2align 4
2456L(Exit10):
2457 mov (%rcx), %rax
2458 mov %rax, (%rdx)
2459 mov 6(%rcx), %eax
2460 mov %eax, 6(%rdx)
2461# ifdef USE_AS_STPCPY
2462 lea 9(%rdx), %rax
2463# else
2464 mov %rdi, %rax
2465# endif
2466# ifdef USE_AS_STRNCPY
2467 sub $10, %r8
2468 lea 10(%rdx), %rcx
2469 jnz L(StrncpyFillTailWithZero1)
2470# ifdef USE_AS_STPCPY
2471 cmpb $1, (%rax)
2472 sbb $-1, %rax
2473# endif
2474# endif
2475 ret
2476
2477 .p2align 4
2478L(Exit11):
2479 mov (%rcx), %rax
2480 mov %rax, (%rdx)
2481 mov 7(%rcx), %eax
2482 mov %eax, 7(%rdx)
2483# ifdef USE_AS_STPCPY
2484 lea 10(%rdx), %rax
2485# else
2486 mov %rdi, %rax
2487# endif
2488# ifdef USE_AS_STRNCPY
2489 sub $11, %r8
2490 lea 11(%rdx), %rcx
2491 jnz L(StrncpyFillTailWithZero1)
2492# ifdef USE_AS_STPCPY
2493 cmpb $1, (%rax)
2494 sbb $-1, %rax
2495# endif
2496# endif
2497 ret
2498
2499 .p2align 4
2500L(Exit12):
2501 mov (%rcx), %rax
2502 mov %rax, (%rdx)
2503 mov 8(%rcx), %eax
2504 mov %eax, 8(%rdx)
2505# ifdef USE_AS_STPCPY
2506 lea 11(%rdx), %rax
2507# else
2508 mov %rdi, %rax
2509# endif
2510# ifdef USE_AS_STRNCPY
2511 sub $12, %r8
2512 lea 12(%rdx), %rcx
2513 jnz L(StrncpyFillTailWithZero1)
2514# ifdef USE_AS_STPCPY
2515 cmpb $1, (%rax)
2516 sbb $-1, %rax
2517# endif
2518# endif
2519 ret
2520
2521 .p2align 4
2522L(Exit13):
2523 mov (%rcx), %rax
2524 mov %rax, (%rdx)
2525 mov 5(%rcx), %rax
2526 mov %rax, 5(%rdx)
2527# ifdef USE_AS_STPCPY
2528 lea 12(%rdx), %rax
2529# else
2530 mov %rdi, %rax
2531# endif
2532# ifdef USE_AS_STRNCPY
2533 sub $13, %r8
2534 lea 13(%rdx), %rcx
2535 jnz L(StrncpyFillTailWithZero1)
2536# ifdef USE_AS_STPCPY
2537 cmpb $1, (%rax)
2538 sbb $-1, %rax
2539# endif
2540# endif
2541 ret
2542
2543 .p2align 4
2544L(Exit14):
2545 mov (%rcx), %rax
2546 mov %rax, (%rdx)
2547 mov 6(%rcx), %rax
2548 mov %rax, 6(%rdx)
2549# ifdef USE_AS_STPCPY
2550 lea 13(%rdx), %rax
2551# else
2552 mov %rdi, %rax
2553# endif
2554# ifdef USE_AS_STRNCPY
2555 sub $14, %r8
2556 lea 14(%rdx), %rcx
2557 jnz L(StrncpyFillTailWithZero1)
2558# ifdef USE_AS_STPCPY
2559 cmpb $1, (%rax)
2560 sbb $-1, %rax
2561# endif
2562# endif
2563 ret
2564
2565 .p2align 4
2566L(Exit15):
2567 mov (%rcx), %rax
2568 mov %rax, (%rdx)
2569 mov 7(%rcx), %rax
2570 mov %rax, 7(%rdx)
2571# ifdef USE_AS_STPCPY
2572 lea 14(%rdx), %rax
2573# else
2574 mov %rdi, %rax
2575# endif
2576# ifdef USE_AS_STRNCPY
2577 sub $15, %r8
2578 lea 15(%rdx), %rcx
2579 jnz L(StrncpyFillTailWithZero1)
2580# ifdef USE_AS_STPCPY
2581 cmpb $1, (%rax)
2582 sbb $-1, %rax
2583# endif
2584# endif
2585 ret
2586
2587# ifdef USE_AS_STRNCPY
2588 .p2align 4
2589L(Fill0):
2590 ret
2591
2592 .p2align 4
2593L(Fill1):
2594 movb %dl, (%rcx)
2595 ret
2596
2597 .p2align 4
2598L(Fill2):
2599 movw %dx, (%rcx)
2600 ret
2601
2602 .p2align 4
2603L(Fill3):
2604 movw %dx, (%rcx)
2605 movb %dl, 2(%rcx)
2606 ret
2607
2608 .p2align 4
2609L(Fill4):
2610 movl %edx, (%rcx)
2611 ret
2612
2613 .p2align 4
2614L(Fill5):
2615 movl %edx, (%rcx)
2616 movb %dl, 4(%rcx)
2617 ret
2618
2619 .p2align 4
2620L(Fill6):
2621 movl %edx, (%rcx)
2622 movw %dx, 4(%rcx)
2623 ret
2624
2625 .p2align 4
2626L(Fill7):
2627 movl %edx, (%rcx)
2628 movl %edx, 3(%rcx)
2629 ret
2630
2631 .p2align 4
2632L(Fill8):
2633 mov %rdx, (%rcx)
2634 ret
2635
2636 .p2align 4
2637L(Fill9):
2638 mov %rdx, (%rcx)
2639 movb %dl, 8(%rcx)
2640 ret
2641
2642 .p2align 4
2643L(Fill10):
2644 mov %rdx, (%rcx)
2645 movw %dx, 8(%rcx)
2646 ret
2647
2648 .p2align 4
2649L(Fill11):
2650 mov %rdx, (%rcx)
2651 movl %edx, 7(%rcx)
2652 ret
2653
2654 .p2align 4
2655L(Fill12):
2656 mov %rdx, (%rcx)
2657 movl %edx, 8(%rcx)
2658 ret
2659
2660 .p2align 4
2661L(Fill13):
2662 mov %rdx, (%rcx)
2663 mov %rdx, 5(%rcx)
2664 ret
2665
2666 .p2align 4
2667L(Fill14):
2668 mov %rdx, (%rcx)
2669 mov %rdx, 6(%rcx)
2670 ret
2671
2672 .p2align 4
2673L(Fill15):
2674 mov %rdx, (%rcx)
2675 mov %rdx, 7(%rcx)
2676 ret
2677
2678 .p2align 4
2679L(Fill16):
2680 mov %rdx, (%rcx)
2681 mov %rdx, 8(%rcx)
2682 ret
2683
2684 .p2align 4
2685L(StrncpyFillExit1):
2686 lea 16(%r8), %r8
2687L(FillFrom1To16Bytes):
2688 test %r8, %r8
2689 jz L(Fill0)
2690 cmp $16, %r8
2691 je L(Fill16)
2692 cmp $8, %r8
2693 je L(Fill8)
2694 jg L(FillMore8)
2695 cmp $4, %r8
2696 je L(Fill4)
2697 jg L(FillMore4)
2698 cmp $2, %r8
2699 jl L(Fill1)
2700 je L(Fill2)
2701 jg L(Fill3)
2702L(FillMore8): /* but less than 16 */
2703 cmp $12, %r8
2704 je L(Fill12)
2705 jl L(FillLess12)
2706 cmp $14, %r8
2707 jl L(Fill13)
2708 je L(Fill14)
2709 jg L(Fill15)
2710L(FillMore4): /* but less than 8 */
2711 cmp $6, %r8
2712 jl L(Fill5)
2713 je L(Fill6)
2714 jg L(Fill7)
2715L(FillLess12): /* but more than 8 */
2716 cmp $10, %r8
2717 jl L(Fill9)
2718 je L(Fill10)
2719 jmp L(Fill11)
2720
2721 .p2align 4
2722L(StrncpyFillTailWithZero1):
2723 xor %rdx, %rdx
2724 sub $16, %r8
2725 jbe L(StrncpyFillExit1)
2726
2727 pxor %xmm0, %xmm0
2728 mov %rdx, (%rcx)
2729 mov %rdx, 8(%rcx)
2730
2731 lea 16(%rcx), %rcx
2732
2733 mov %rcx, %rdx
2734 and $0xf, %rdx
2735 sub %rdx, %rcx
2736 add %rdx, %r8
2737 xor %rdx, %rdx
2738 sub $64, %r8
2739 jb L(StrncpyFillLess64)
2740
2741L(StrncpyFillLoopMovdqa):
2742 movdqa %xmm0, (%rcx)
2743 movdqa %xmm0, 16(%rcx)
2744 movdqa %xmm0, 32(%rcx)
2745 movdqa %xmm0, 48(%rcx)
2746 lea 64(%rcx), %rcx
2747 sub $64, %r8
2748 jae L(StrncpyFillLoopMovdqa)
2749
2750L(StrncpyFillLess64):
2751 add $32, %r8
2752 jl L(StrncpyFillLess32)
2753 movdqa %xmm0, (%rcx)
2754 movdqa %xmm0, 16(%rcx)
2755 lea 32(%rcx), %rcx
2756 sub $16, %r8
2757 jl L(StrncpyFillExit1)
2758 movdqa %xmm0, (%rcx)
2759 lea 16(%rcx), %rcx
2760 jmp L(FillFrom1To16Bytes)
2761
2762L(StrncpyFillLess32):
2763 add $16, %r8
2764 jl L(StrncpyFillExit1)
2765 movdqa %xmm0, (%rcx)
2766 lea 16(%rcx), %rcx
2767 jmp L(FillFrom1To16Bytes)
2768
2769 .p2align 4
2770L(Exit0):
2771 mov %rdx, %rax
2772 ret
2773
2774 .p2align 4
2775L(StrncpyExit15Bytes):
2776 cmp $9, %r8
2777 je L(Exit9)
2778 cmpb $0, 8(%rcx)
2779 jz L(Exit9)
2780 cmp $10, %r8
2781 je L(Exit10)
2782 cmpb $0, 9(%rcx)
2783 jz L(Exit10)
2784 cmp $11, %r8
2785 je L(Exit11)
2786 cmpb $0, 10(%rcx)
2787 jz L(Exit11)
2788 cmp $12, %r8
2789 je L(Exit12)
2790 cmpb $0, 11(%rcx)
2791 jz L(Exit12)
2792 cmp $13, %r8
2793 je L(Exit13)
2794 cmpb $0, 12(%rcx)
2795 jz L(Exit13)
2796 cmp $14, %r8
2797 je L(Exit14)
2798 cmpb $0, 13(%rcx)
2799 jz L(Exit14)
2800 mov (%rcx), %rax
2801 mov %rax, (%rdx)
2802 mov 7(%rcx), %rax
2803 mov %rax, 7(%rdx)
2804# ifdef USE_AS_STPCPY
2805 lea 14(%rdx), %rax
2806 cmpb $1, (%rax)
2807 sbb $-1, %rax
2808# else
2809 mov %rdi, %rax
2810# endif
2811 ret
2812
2813 .p2align 4
2814L(StrncpyExit8Bytes):
2815 cmp $1, %r8
2816 je L(Exit1)
2817 cmpb $0, (%rcx)
2818 jz L(Exit1)
2819 cmp $2, %r8
2820 je L(Exit2)
2821 cmpb $0, 1(%rcx)
2822 jz L(Exit2)
2823 cmp $3, %r8
2824 je L(Exit3)
2825 cmpb $0, 2(%rcx)
2826 jz L(Exit3)
2827 cmp $4, %r8
2828 je L(Exit4)
2829 cmpb $0, 3(%rcx)
2830 jz L(Exit4)
2831 cmp $5, %r8
2832 je L(Exit5)
2833 cmpb $0, 4(%rcx)
2834 jz L(Exit5)
2835 cmp $6, %r8
2836 je L(Exit6)
2837 cmpb $0, 5(%rcx)
2838 jz L(Exit6)
2839 cmp $7, %r8
2840 je L(Exit7)
2841 cmpb $0, 6(%rcx)
2842 jz L(Exit7)
2843 mov (%rcx), %rax
2844 mov %rax, (%rdx)
2845# ifdef USE_AS_STPCPY
2846 lea 7(%rdx), %rax
2847 cmpb $1, (%rax)
2848 sbb $-1, %rax
2849# else
2850 mov %rdi, %rax
2851# endif
2852 ret
2853
2854# endif
2855# endif
2856
2857# ifdef USE_AS_STRNCPY
2858 .p2align 4
2859L(StrncpyLeaveCase2OrCase3):
2860 test %rax, %rax
2861 jnz L(Aligned64LeaveCase2)
2862
2863L(Aligned64LeaveCase3):
2864 lea 64(%r8), %r8
2865 sub $16, %r8
2866 jbe L(CopyFrom1To16BytesCase3)
2867 movaps %xmm4, -64(%rdx)
2868 lea 16(%rsi), %rsi
2869 sub $16, %r8
2870 jbe L(CopyFrom1To16BytesCase3)
2871 movaps %xmm5, -48(%rdx)
2872 lea 16(%rsi), %rsi
2873 sub $16, %r8
2874 jbe L(CopyFrom1To16BytesCase3)
2875 movaps %xmm6, -32(%rdx)
2876 lea 16(%rsi), %rsi
2877 lea -16(%r8), %r8
2878 jmp L(CopyFrom1To16BytesCase3)
2879
2880L(Aligned64LeaveCase2):
2881 pcmpeqb %xmm4, %xmm0
2882 pmovmskb %xmm0, %rax
2883 add $48, %r8
2884 jle L(CopyFrom1To16BytesCase2OrCase3)
2885 test %rax, %rax
2886 jnz L(CopyFrom1To16Bytes)
2887
2888 pcmpeqb %xmm5, %xmm0
2889 pmovmskb %xmm0, %rax
2890 movaps %xmm4, -64(%rdx)
2891 lea 16(%rsi), %rsi
2892 sub $16, %r8
2893 jbe L(CopyFrom1To16BytesCase2OrCase3)
2894 test %rax, %rax
2895 jnz L(CopyFrom1To16Bytes)
2896
2897 pcmpeqb %xmm6, %xmm0
2898 pmovmskb %xmm0, %rax
2899 movaps %xmm5, -48(%rdx)
2900 lea 16(%rsi), %rsi
2901 sub $16, %r8
2902 jbe L(CopyFrom1To16BytesCase2OrCase3)
2903 test %rax, %rax
2904 jnz L(CopyFrom1To16Bytes)
2905
2906 pcmpeqb %xmm7, %xmm0
2907 pmovmskb %xmm0, %rax
2908 movaps %xmm6, -32(%rdx)
2909 lea 16(%rsi), %rsi
2910 lea -16(%r8), %r8
2911 jmp L(CopyFrom1To16BytesCase2)
2912/*--------------------------------------------------*/
2913 .p2align 4
2914L(StrncpyExit1Case2OrCase3):
2915 movdqu -1(%rcx), %xmm0
2916 movdqu %xmm0, -1(%rdx)
2917 mov $15, %rsi
2918 test %rax, %rax
2919 jnz L(CopyFrom1To16BytesCase2)
2920 jmp L(CopyFrom1To16BytesCase3)
2921
2922 .p2align 4
2923L(StrncpyExit2Case2OrCase3):
2924 movdqu -2(%rcx), %xmm0
2925 movdqu %xmm0, -2(%rdx)
2926 mov $14, %rsi
2927 test %rax, %rax
2928 jnz L(CopyFrom1To16BytesCase2)
2929 jmp L(CopyFrom1To16BytesCase3)
2930
2931 .p2align 4
2932L(StrncpyExit3Case2OrCase3):
2933 movdqu -3(%rcx), %xmm0
2934 movdqu %xmm0, -3(%rdx)
2935 mov $13, %rsi
2936 test %rax, %rax
2937 jnz L(CopyFrom1To16BytesCase2)
2938 jmp L(CopyFrom1To16BytesCase3)
2939
2940 .p2align 4
2941L(StrncpyExit4Case2OrCase3):
2942 movdqu -4(%rcx), %xmm0
2943 movdqu %xmm0, -4(%rdx)
2944 mov $12, %rsi
2945 test %rax, %rax
2946 jnz L(CopyFrom1To16BytesCase2)
2947 jmp L(CopyFrom1To16BytesCase3)
2948
2949 .p2align 4
2950L(StrncpyExit5Case2OrCase3):
2951 movdqu -5(%rcx), %xmm0
2952 movdqu %xmm0, -5(%rdx)
2953 mov $11, %rsi
2954 test %rax, %rax
2955 jnz L(CopyFrom1To16BytesCase2)
2956 jmp L(CopyFrom1To16BytesCase3)
2957
2958 .p2align 4
2959L(StrncpyExit6Case2OrCase3):
2960 mov (%rcx), %rsi
2961 mov 6(%rcx), %r9d
2962 mov %r9d, 6(%rdx)
2963 mov %rsi, (%rdx)
2964 test %rax, %rax
2965 mov $10, %rsi
2966 jnz L(CopyFrom1To16BytesCase2)
2967 jmp L(CopyFrom1To16BytesCase3)
2968
2969 .p2align 4
2970L(StrncpyExit7Case2OrCase3):
2971 mov (%rcx), %rsi
2972 mov 5(%rcx), %r9d
2973 mov %r9d, 5(%rdx)
2974 mov %rsi, (%rdx)
2975 test %rax, %rax
2976 mov $9, %rsi
2977 jnz L(CopyFrom1To16BytesCase2)
2978 jmp L(CopyFrom1To16BytesCase3)
2979
2980 .p2align 4
2981L(StrncpyExit8Case2OrCase3):
2982 mov (%rcx), %r9
2983 mov $8, %rsi
2984 mov %r9, (%rdx)
2985 test %rax, %rax
2986 jnz L(CopyFrom1To16BytesCase2)
2987 jmp L(CopyFrom1To16BytesCase3)
2988
2989 .p2align 4
2990L(StrncpyExit9Case2OrCase3):
2991 mov -1(%rcx), %r9
2992 mov $7, %rsi
2993 mov %r9, -1(%rdx)
2994 test %rax, %rax
2995 jnz L(CopyFrom1To16BytesCase2)
2996 jmp L(CopyFrom1To16BytesCase3)
2997
2998 .p2align 4
2999L(StrncpyExit10Case2OrCase3):
3000 mov -2(%rcx), %r9
3001 mov $6, %rsi
3002 mov %r9, -2(%rdx)
3003 test %rax, %rax
3004 jnz L(CopyFrom1To16BytesCase2)
3005 jmp L(CopyFrom1To16BytesCase3)
3006
3007 .p2align 4
3008L(StrncpyExit11Case2OrCase3):
3009 mov -3(%rcx), %r9
3010 mov $5, %rsi
3011 mov %r9, -3(%rdx)
3012 test %rax, %rax
3013 jnz L(CopyFrom1To16BytesCase2)
3014 jmp L(CopyFrom1To16BytesCase3)
3015
3016 .p2align 4
3017L(StrncpyExit12Case2OrCase3):
3018 mov (%rcx), %r9d
3019 mov $4, %rsi
3020 mov %r9d, (%rdx)
3021 test %rax, %rax
3022 jnz L(CopyFrom1To16BytesCase2)
3023 jmp L(CopyFrom1To16BytesCase3)
3024
3025 .p2align 4
3026L(StrncpyExit13Case2OrCase3):
3027 mov -1(%rcx), %r9d
3028 mov $3, %rsi
3029 mov %r9d, -1(%rdx)
3030 test %rax, %rax
3031 jnz L(CopyFrom1To16BytesCase2)
3032 jmp L(CopyFrom1To16BytesCase3)
3033
3034 .p2align 4
3035L(StrncpyExit14Case2OrCase3):
3036 mov -2(%rcx), %r9d
3037 mov $2, %rsi
3038 mov %r9d, -2(%rdx)
3039 test %rax, %rax
3040 jnz L(CopyFrom1To16BytesCase2)
3041 jmp L(CopyFrom1To16BytesCase3)
3042
3043 .p2align 4
3044L(StrncpyExit15Case2OrCase3):
3045 mov -3(%rcx), %r9d
3046 mov $1, %rsi
3047 mov %r9d, -3(%rdx)
3048 test %rax, %rax
3049 jnz L(CopyFrom1To16BytesCase2)
3050 jmp L(CopyFrom1To16BytesCase3)
3051
3052 .p2align 4
3053L(StrncpyLeave1):
3054 movaps %xmm2, %xmm3
3055 add $48, %r8
3056 jle L(StrncpyExit1)
3057 palignr $1, %xmm1, %xmm2
3058 movaps %xmm2, (%rdx)
3059 movaps 31(%rcx), %xmm2
3060 lea 16(%rsi), %rsi
3061 sub $16, %r8
3062 jbe L(StrncpyExit1)
3063 palignr $1, %xmm3, %xmm2
3064 movaps %xmm2, 16(%rdx)
3065 lea 16(%rsi), %rsi
3066 sub $16, %r8
3067 jbe L(StrncpyExit1)
3068 movaps %xmm4, 32(%rdx)
3069 lea 16(%rsi), %rsi
3070 sub $16, %r8
3071 jbe L(StrncpyExit1)
3072 movaps %xmm5, 48(%rdx)
3073 lea 16(%rsi), %rsi
3074 lea -16(%r8), %r8
3075
3076L(StrncpyExit1):
3077 lea 15(%rdx, %rsi), %rdx
3078 lea 15(%rcx, %rsi), %rcx
3079 mov -15(%rcx), %rsi
3080 mov -8(%rcx), %rax
3081 mov %rsi, -15(%rdx)
3082 mov %rax, -8(%rdx)
3083 xor %rsi, %rsi
3084 jmp L(CopyFrom1To16BytesCase3)
3085
3086 .p2align 4
3087L(StrncpyLeave2):
3088 movaps %xmm2, %xmm3
3089 add $48, %r8
3090 jle L(StrncpyExit2)
3091 palignr $2, %xmm1, %xmm2
3092 movaps %xmm2, (%rdx)
3093 movaps 30(%rcx), %xmm2
3094 lea 16(%rsi), %rsi
3095 sub $16, %r8
3096 jbe L(StrncpyExit2)
3097 palignr $2, %xmm3, %xmm2
3098 movaps %xmm2, 16(%rdx)
3099 lea 16(%rsi), %rsi
3100 sub $16, %r8
3101 jbe L(StrncpyExit2)
3102 movaps %xmm4, 32(%rdx)
3103 lea 16(%rsi), %rsi
3104 sub $16, %r8
3105 jbe L(StrncpyExit2)
3106 movaps %xmm5, 48(%rdx)
3107 lea 16(%rsi), %rsi
3108 lea -16(%r8), %r8
3109
3110L(StrncpyExit2):
3111 lea 14(%rdx, %rsi), %rdx
3112 lea 14(%rcx, %rsi), %rcx
3113 mov -14(%rcx), %rsi
3114 mov -8(%rcx), %rax
3115 mov %rsi, -14(%rdx)
3116 mov %rax, -8(%rdx)
3117 xor %rsi, %rsi
3118 jmp L(CopyFrom1To16BytesCase3)
3119
3120 .p2align 4
3121L(StrncpyLeave3):
3122 movaps %xmm2, %xmm3
3123 add $48, %r8
3124 jle L(StrncpyExit3)
3125 palignr $3, %xmm1, %xmm2
3126 movaps %xmm2, (%rdx)
3127 movaps 29(%rcx), %xmm2
3128 lea 16(%rsi), %rsi
3129 sub $16, %r8
3130 jbe L(StrncpyExit3)
3131 palignr $3, %xmm3, %xmm2
3132 movaps %xmm2, 16(%rdx)
3133 lea 16(%rsi), %rsi
3134 sub $16, %r8
3135 jbe L(StrncpyExit3)
3136 movaps %xmm4, 32(%rdx)
3137 lea 16(%rsi), %rsi
3138 sub $16, %r8
3139 jbe L(StrncpyExit3)
3140 movaps %xmm5, 48(%rdx)
3141 lea 16(%rsi), %rsi
3142 lea -16(%r8), %r8
3143
3144L(StrncpyExit3):
3145 lea 13(%rdx, %rsi), %rdx
3146 lea 13(%rcx, %rsi), %rcx
3147 mov -13(%rcx), %rsi
3148 mov -8(%rcx), %rax
3149 mov %rsi, -13(%rdx)
3150 mov %rax, -8(%rdx)
3151 xor %rsi, %rsi
3152 jmp L(CopyFrom1To16BytesCase3)
3153
3154 .p2align 4
3155L(StrncpyLeave4):
3156 movaps %xmm2, %xmm3
3157 add $48, %r8
3158 jle L(StrncpyExit4)
3159 palignr $4, %xmm1, %xmm2
3160 movaps %xmm2, (%rdx)
3161 movaps 28(%rcx), %xmm2
3162 lea 16(%rsi), %rsi
3163 sub $16, %r8
3164 jbe L(StrncpyExit4)
3165 palignr $4, %xmm3, %xmm2
3166 movaps %xmm2, 16(%rdx)
3167 lea 16(%rsi), %rsi
3168 sub $16, %r8
3169 jbe L(StrncpyExit4)
3170 movaps %xmm4, 32(%rdx)
3171 lea 16(%rsi), %rsi
3172 sub $16, %r8
3173 jbe L(StrncpyExit4)
3174 movaps %xmm5, 48(%rdx)
3175 lea 16(%rsi), %rsi
3176 lea -16(%r8), %r8
3177
3178L(StrncpyExit4):
3179 lea 12(%rdx, %rsi), %rdx
3180 lea 12(%rcx, %rsi), %rcx
3181 mov -12(%rcx), %rsi
3182 mov -4(%rcx), %eax
3183 mov %rsi, -12(%rdx)
3184 mov %eax, -4(%rdx)
3185 xor %rsi, %rsi
3186 jmp L(CopyFrom1To16BytesCase3)
3187
3188 .p2align 4
3189L(StrncpyLeave5):
3190 movaps %xmm2, %xmm3
3191 add $48, %r8
3192 jle L(StrncpyExit5)
3193 palignr $5, %xmm1, %xmm2
3194 movaps %xmm2, (%rdx)
3195 movaps 27(%rcx), %xmm2
3196 lea 16(%rsi), %rsi
3197 sub $16, %r8
3198 jbe L(StrncpyExit5)
3199 palignr $5, %xmm3, %xmm2
3200 movaps %xmm2, 16(%rdx)
3201 lea 16(%rsi), %rsi
3202 sub $16, %r8
3203 jbe L(StrncpyExit5)
3204 movaps %xmm4, 32(%rdx)
3205 lea 16(%rsi), %rsi
3206 sub $16, %r8
3207 jbe L(StrncpyExit5)
3208 movaps %xmm5, 48(%rdx)
3209 lea 16(%rsi), %rsi
3210 lea -16(%r8), %r8
3211
3212L(StrncpyExit5):
3213 lea 11(%rdx, %rsi), %rdx
3214 lea 11(%rcx, %rsi), %rcx
3215 mov -11(%rcx), %rsi
3216 mov -4(%rcx), %eax
3217 mov %rsi, -11(%rdx)
3218 mov %eax, -4(%rdx)
3219 xor %rsi, %rsi
3220 jmp L(CopyFrom1To16BytesCase3)
3221
3222 .p2align 4
3223L(StrncpyLeave6):
3224 movaps %xmm2, %xmm3
3225 add $48, %r8
3226 jle L(StrncpyExit6)
3227 palignr $6, %xmm1, %xmm2
3228 movaps %xmm2, (%rdx)
3229 movaps 26(%rcx), %xmm2
3230 lea 16(%rsi), %rsi
3231 sub $16, %r8
3232 jbe L(StrncpyExit6)
3233 palignr $6, %xmm3, %xmm2
3234 movaps %xmm2, 16(%rdx)
3235 lea 16(%rsi), %rsi
3236 sub $16, %r8
3237 jbe L(StrncpyExit6)
3238 movaps %xmm4, 32(%rdx)
3239 lea 16(%rsi), %rsi
3240 sub $16, %r8
3241 jbe L(StrncpyExit6)
3242 movaps %xmm5, 48(%rdx)
3243 lea 16(%rsi), %rsi
3244 lea -16(%r8), %r8
3245
3246L(StrncpyExit6):
3247 lea 10(%rdx, %rsi), %rdx
3248 lea 10(%rcx, %rsi), %rcx
3249 mov -10(%rcx), %rsi
3250 movw -2(%rcx), %ax
3251 mov %rsi, -10(%rdx)
3252 movw %ax, -2(%rdx)
3253 xor %rsi, %rsi
3254 jmp L(CopyFrom1To16BytesCase3)
3255
3256 .p2align 4
3257L(StrncpyLeave7):
3258 movaps %xmm2, %xmm3
3259 add $48, %r8
3260 jle L(StrncpyExit7)
3261 palignr $7, %xmm1, %xmm2
3262 movaps %xmm2, (%rdx)
3263 movaps 25(%rcx), %xmm2
3264 lea 16(%rsi), %rsi
3265 sub $16, %r8
3266 jbe L(StrncpyExit7)
3267 palignr $7, %xmm3, %xmm2
3268 movaps %xmm2, 16(%rdx)
3269 lea 16(%rsi), %rsi
3270 sub $16, %r8
3271 jbe L(StrncpyExit7)
3272 movaps %xmm4, 32(%rdx)
3273 lea 16(%rsi), %rsi
3274 sub $16, %r8
3275 jbe L(StrncpyExit7)
3276 movaps %xmm5, 48(%rdx)
3277 lea 16(%rsi), %rsi
3278 lea -16(%r8), %r8
3279
3280L(StrncpyExit7):
3281 lea 9(%rdx, %rsi), %rdx
3282 lea 9(%rcx, %rsi), %rcx
3283 mov -9(%rcx), %rsi
3284 movb -1(%rcx), %ah
3285 mov %rsi, -9(%rdx)
3286 movb %ah, -1(%rdx)
3287 xor %rsi, %rsi
3288 jmp L(CopyFrom1To16BytesCase3)
3289
3290 .p2align 4
3291L(StrncpyLeave8):
3292 movaps %xmm2, %xmm3
3293 add $48, %r8
3294 jle L(StrncpyExit8)
3295 palignr $8, %xmm1, %xmm2
3296 movaps %xmm2, (%rdx)
3297 movaps 24(%rcx), %xmm2
3298 lea 16(%rsi), %rsi
3299 sub $16, %r8
3300 jbe L(StrncpyExit8)
3301 palignr $8, %xmm3, %xmm2
3302 movaps %xmm2, 16(%rdx)
3303 lea 16(%rsi), %rsi
3304 sub $16, %r8
3305 jbe L(StrncpyExit8)
3306 movaps %xmm4, 32(%rdx)
3307 lea 16(%rsi), %rsi
3308 sub $16, %r8
3309 jbe L(StrncpyExit8)
3310 movaps %xmm5, 48(%rdx)
3311 lea 16(%rsi), %rsi
3312 lea -16(%r8), %r8
3313
3314L(StrncpyExit8):
3315 lea 8(%rdx, %rsi), %rdx
3316 lea 8(%rcx, %rsi), %rcx
3317 mov -8(%rcx), %rax
3318 xor %rsi, %rsi
3319 mov %rax, -8(%rdx)
3320 jmp L(CopyFrom1To16BytesCase3)
3321
3322 .p2align 4
3323L(StrncpyLeave9):
3324 movaps %xmm2, %xmm3
3325 add $48, %r8
3326 jle L(StrncpyExit9)
3327 palignr $9, %xmm1, %xmm2
3328 movaps %xmm2, (%rdx)
3329 movaps 23(%rcx), %xmm2
3330 lea 16(%rsi), %rsi
3331 sub $16, %r8
3332 jbe L(StrncpyExit9)
3333 palignr $9, %xmm3, %xmm2
3334 movaps %xmm2, 16(%rdx)
3335 lea 16(%rsi), %rsi
3336 sub $16, %r8
3337 jbe L(StrncpyExit9)
3338 movaps %xmm4, 32(%rdx)
3339 lea 16(%rsi), %rsi
3340 sub $16, %r8
3341 jbe L(StrncpyExit9)
3342 movaps %xmm5, 48(%rdx)
3343 lea 16(%rsi), %rsi
3344 lea -16(%r8), %r8
3345
3346L(StrncpyExit9):
3347 lea 7(%rdx, %rsi), %rdx
3348 lea 7(%rcx, %rsi), %rcx
3349 mov -8(%rcx), %rax
3350 xor %rsi, %rsi
3351 mov %rax, -8(%rdx)
3352 jmp L(CopyFrom1To16BytesCase3)
3353
3354 .p2align 4
3355L(StrncpyLeave10):
3356 movaps %xmm2, %xmm3
3357 add $48, %r8
3358 jle L(StrncpyExit10)
3359 palignr $10, %xmm1, %xmm2
3360 movaps %xmm2, (%rdx)
3361 movaps 22(%rcx), %xmm2
3362 lea 16(%rsi), %rsi
3363 sub $16, %r8
3364 jbe L(StrncpyExit10)
3365 palignr $10, %xmm3, %xmm2
3366 movaps %xmm2, 16(%rdx)
3367 lea 16(%rsi), %rsi
3368 sub $16, %r8
3369 jbe L(StrncpyExit10)
3370 movaps %xmm4, 32(%rdx)
3371 lea 16(%rsi), %rsi
3372 sub $16, %r8
3373 jbe L(StrncpyExit10)
3374 movaps %xmm5, 48(%rdx)
3375 lea 16(%rsi), %rsi
3376 lea -16(%r8), %r8
3377
3378L(StrncpyExit10):
3379 lea 6(%rdx, %rsi), %rdx
3380 lea 6(%rcx, %rsi), %rcx
3381 mov -8(%rcx), %rax
3382 xor %rsi, %rsi
3383 mov %rax, -8(%rdx)
3384 jmp L(CopyFrom1To16BytesCase3)
3385
3386 .p2align 4
3387L(StrncpyLeave11):
3388 movaps %xmm2, %xmm3
3389 add $48, %r8
3390 jle L(StrncpyExit11)
3391 palignr $11, %xmm1, %xmm2
3392 movaps %xmm2, (%rdx)
3393 movaps 21(%rcx), %xmm2
3394 lea 16(%rsi), %rsi
3395 sub $16, %r8
3396 jbe L(StrncpyExit11)
3397 palignr $11, %xmm3, %xmm2
3398 movaps %xmm2, 16(%rdx)
3399 lea 16(%rsi), %rsi
3400 sub $16, %r8
3401 jbe L(StrncpyExit11)
3402 movaps %xmm4, 32(%rdx)
3403 lea 16(%rsi), %rsi
3404 sub $16, %r8
3405 jbe L(StrncpyExit11)
3406 movaps %xmm5, 48(%rdx)
3407 lea 16(%rsi), %rsi
3408 lea -16(%r8), %r8
3409
3410L(StrncpyExit11):
3411 lea 5(%rdx, %rsi), %rdx
3412 lea 5(%rcx, %rsi), %rcx
3413 mov -8(%rcx), %rax
3414 xor %rsi, %rsi
3415 mov %rax, -8(%rdx)
3416 jmp L(CopyFrom1To16BytesCase3)
3417
3418 .p2align 4
3419L(StrncpyLeave12):
3420 movaps %xmm2, %xmm3
3421 add $48, %r8
3422 jle L(StrncpyExit12)
3423 palignr $12, %xmm1, %xmm2
3424 movaps %xmm2, (%rdx)
3425 movaps 20(%rcx), %xmm2
3426 lea 16(%rsi), %rsi
3427 sub $16, %r8
3428 jbe L(StrncpyExit12)
3429 palignr $12, %xmm3, %xmm2
3430 movaps %xmm2, 16(%rdx)
3431 lea 16(%rsi), %rsi
3432 sub $16, %r8
3433 jbe L(StrncpyExit12)
3434 movaps %xmm4, 32(%rdx)
3435 lea 16(%rsi), %rsi
3436 sub $16, %r8
3437 jbe L(StrncpyExit12)
3438 movaps %xmm5, 48(%rdx)
3439 lea 16(%rsi), %rsi
3440 lea -16(%r8), %r8
3441
3442L(StrncpyExit12):
3443 lea 4(%rdx, %rsi), %rdx
3444 lea 4(%rcx, %rsi), %rcx
3445 mov -4(%rcx), %eax
3446 xor %rsi, %rsi
3447 mov %eax, -4(%rdx)
3448 jmp L(CopyFrom1To16BytesCase3)
3449
3450 .p2align 4
3451L(StrncpyLeave13):
3452 movaps %xmm2, %xmm3
3453 add $48, %r8
3454 jle L(StrncpyExit13)
3455 palignr $13, %xmm1, %xmm2
3456 movaps %xmm2, (%rdx)
3457 movaps 19(%rcx), %xmm2
3458 lea 16(%rsi), %rsi
3459 sub $16, %r8
3460 jbe L(StrncpyExit13)
3461 palignr $13, %xmm3, %xmm2
3462 movaps %xmm2, 16(%rdx)
3463 lea 16(%rsi), %rsi
3464 sub $16, %r8
3465 jbe L(StrncpyExit13)
3466 movaps %xmm4, 32(%rdx)
3467 lea 16(%rsi), %rsi
3468 sub $16, %r8
3469 jbe L(StrncpyExit13)
3470 movaps %xmm5, 48(%rdx)
3471 lea 16(%rsi), %rsi
3472 lea -16(%r8), %r8
3473
3474L(StrncpyExit13):
3475 lea 3(%rdx, %rsi), %rdx
3476 lea 3(%rcx, %rsi), %rcx
3477 mov -4(%rcx), %eax
3478 xor %rsi, %rsi
3479 mov %eax, -4(%rdx)
3480 jmp L(CopyFrom1To16BytesCase3)
3481
3482 .p2align 4
3483L(StrncpyLeave14):
3484 movaps %xmm2, %xmm3
3485 add $48, %r8
3486 jle L(StrncpyExit14)
3487 palignr $14, %xmm1, %xmm2
3488 movaps %xmm2, (%rdx)
3489 movaps 18(%rcx), %xmm2
3490 lea 16(%rsi), %rsi
3491 sub $16, %r8
3492 jbe L(StrncpyExit14)
3493 palignr $14, %xmm3, %xmm2
3494 movaps %xmm2, 16(%rdx)
3495 lea 16(%rsi), %rsi
3496 sub $16, %r8
3497 jbe L(StrncpyExit14)
3498 movaps %xmm4, 32(%rdx)
3499 lea 16(%rsi), %rsi
3500 sub $16, %r8
3501 jbe L(StrncpyExit14)
3502 movaps %xmm5, 48(%rdx)
3503 lea 16(%rsi), %rsi
3504 lea -16(%r8), %r8
3505
3506L(StrncpyExit14):
3507 lea 2(%rdx, %rsi), %rdx
3508 lea 2(%rcx, %rsi), %rcx
3509 movw -2(%rcx), %ax
3510 xor %rsi, %rsi
3511 movw %ax, -2(%rdx)
3512 jmp L(CopyFrom1To16BytesCase3)
3513
3514 .p2align 4
3515L(StrncpyLeave15):
3516 movaps %xmm2, %xmm3
3517 add $48, %r8
3518 jle L(StrncpyExit15)
3519 palignr $15, %xmm1, %xmm2
3520 movaps %xmm2, (%rdx)
3521 movaps 17(%rcx), %xmm2
3522 lea 16(%rsi), %rsi
3523 sub $16, %r8
3524 jbe L(StrncpyExit15)
3525 palignr $15, %xmm3, %xmm2
3526 movaps %xmm2, 16(%rdx)
3527 lea 16(%rsi), %rsi
3528 sub $16, %r8
3529 jbe L(StrncpyExit15)
3530 movaps %xmm4, 32(%rdx)
3531 lea 16(%rsi), %rsi
3532 sub $16, %r8
3533 jbe L(StrncpyExit15)
3534 movaps %xmm5, 48(%rdx)
3535 lea 16(%rsi), %rsi
3536 lea -16(%r8), %r8
3537
3538L(StrncpyExit15):
3539 lea 1(%rdx, %rsi), %rdx
3540 lea 1(%rcx, %rsi), %rcx
3541 movb -1(%rcx), %ah
3542 xor %rsi, %rsi
3543 movb %ah, -1(%rdx)
3544 jmp L(CopyFrom1To16BytesCase3)
3545
3546# endif
3547# ifndef USE_AS_STRCAT
3548END (STRCPY)
3549# endif
3550#endif
3551

source code of glibc/sysdeps/x86_64/multiarch/strcpy-ssse3.S