1/* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21# ifndef USE_AS_STRCAT
22# include <sysdep.h>
23
24# ifndef STRCPY
25# define STRCPY __strcpy_sse2_unaligned
26# endif
27
28# endif
29
30# define JMPTBL(I, B) I - B
31# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
32 lea TABLE(%rip), %r11; \
33 movslq (%r11, INDEX, SCALE), %rcx; \
34 lea (%r11, %rcx), %rcx; \
35 _CET_NOTRACK jmp *%rcx
36
37# ifndef USE_AS_STRCAT
38
39.text
40ENTRY (STRCPY)
41# ifdef USE_AS_STRNCPY
42 mov %RDX_LP, %R8_LP
43 test %R8_LP, %R8_LP
44 jz L(ExitZero)
45# endif
46 mov %rsi, %rcx
47# ifndef USE_AS_STPCPY
48 mov %rdi, %rax /* save result */
49# endif
50
51# endif
52
53 and $63, %rcx
54 cmp $32, %rcx
55 jbe L(SourceStringAlignmentLess32)
56
57 and $-16, %rsi
58 and $15, %rcx
59 pxor %xmm0, %xmm0
60 pxor %xmm1, %xmm1
61
62 pcmpeqb (%rsi), %xmm1
63 pmovmskb %xmm1, %rdx
64 shr %cl, %rdx
65
66# ifdef USE_AS_STRNCPY
67# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
68 mov $16, %r10
69 sub %rcx, %r10
70 cmp %r10, %r8
71# else
72 mov $17, %r10
73 sub %rcx, %r10
74 cmp %r10, %r8
75# endif
76 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
77# endif
78 test %rdx, %rdx
79 jnz L(CopyFrom1To16BytesTail)
80
81 pcmpeqb 16(%rsi), %xmm0
82 pmovmskb %xmm0, %rdx
83
84# ifdef USE_AS_STRNCPY
85 add $16, %r10
86 cmp %r10, %r8
87 jbe L(CopyFrom1To32BytesCase2OrCase3)
88# endif
89 test %rdx, %rdx
90 jnz L(CopyFrom1To32Bytes)
91
92 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
93 movdqu %xmm1, (%rdi)
94
95/* If source address alignment != destination address alignment */
96 .p2align 4
97L(Unalign16Both):
98 sub %rcx, %rdi
99# ifdef USE_AS_STRNCPY
100 add %rcx, %r8
101 sbb %rcx, %rcx
102 or %rcx, %r8
103# endif
104 mov $16, %rcx
105 movdqa (%rsi, %rcx), %xmm1
106 movaps 16(%rsi, %rcx), %xmm2
107 movdqu %xmm1, (%rdi, %rcx)
108 pcmpeqb %xmm2, %xmm0
109 pmovmskb %xmm0, %rdx
110 add $16, %rcx
111# ifdef USE_AS_STRNCPY
112 sub $48, %r8
113 jbe L(CopyFrom1To16BytesCase2OrCase3)
114# endif
115 test %rdx, %rdx
116# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
117 jnz L(CopyFrom1To16BytesUnalignedXmm2)
118# else
119 jnz L(CopyFrom1To16Bytes)
120# endif
121
122 movaps 16(%rsi, %rcx), %xmm3
123 movdqu %xmm2, (%rdi, %rcx)
124 pcmpeqb %xmm3, %xmm0
125 pmovmskb %xmm0, %rdx
126 add $16, %rcx
127# ifdef USE_AS_STRNCPY
128 sub $16, %r8
129 jbe L(CopyFrom1To16BytesCase2OrCase3)
130# endif
131 test %rdx, %rdx
132# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
133 jnz L(CopyFrom1To16BytesUnalignedXmm3)
134# else
135 jnz L(CopyFrom1To16Bytes)
136# endif
137
138 movaps 16(%rsi, %rcx), %xmm4
139 movdqu %xmm3, (%rdi, %rcx)
140 pcmpeqb %xmm4, %xmm0
141 pmovmskb %xmm0, %rdx
142 add $16, %rcx
143# ifdef USE_AS_STRNCPY
144 sub $16, %r8
145 jbe L(CopyFrom1To16BytesCase2OrCase3)
146# endif
147 test %rdx, %rdx
148# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
149 jnz L(CopyFrom1To16BytesUnalignedXmm4)
150# else
151 jnz L(CopyFrom1To16Bytes)
152# endif
153
154 movaps 16(%rsi, %rcx), %xmm1
155 movdqu %xmm4, (%rdi, %rcx)
156 pcmpeqb %xmm1, %xmm0
157 pmovmskb %xmm0, %rdx
158 add $16, %rcx
159# ifdef USE_AS_STRNCPY
160 sub $16, %r8
161 jbe L(CopyFrom1To16BytesCase2OrCase3)
162# endif
163 test %rdx, %rdx
164# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
165 jnz L(CopyFrom1To16BytesUnalignedXmm1)
166# else
167 jnz L(CopyFrom1To16Bytes)
168# endif
169
170 movaps 16(%rsi, %rcx), %xmm2
171 movdqu %xmm1, (%rdi, %rcx)
172 pcmpeqb %xmm2, %xmm0
173 pmovmskb %xmm0, %rdx
174 add $16, %rcx
175# ifdef USE_AS_STRNCPY
176 sub $16, %r8
177 jbe L(CopyFrom1To16BytesCase2OrCase3)
178# endif
179 test %rdx, %rdx
180# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
181 jnz L(CopyFrom1To16BytesUnalignedXmm2)
182# else
183 jnz L(CopyFrom1To16Bytes)
184# endif
185
186 movaps 16(%rsi, %rcx), %xmm3
187 movdqu %xmm2, (%rdi, %rcx)
188 pcmpeqb %xmm3, %xmm0
189 pmovmskb %xmm0, %rdx
190 add $16, %rcx
191# ifdef USE_AS_STRNCPY
192 sub $16, %r8
193 jbe L(CopyFrom1To16BytesCase2OrCase3)
194# endif
195 test %rdx, %rdx
196# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
197 jnz L(CopyFrom1To16BytesUnalignedXmm3)
198# else
199 jnz L(CopyFrom1To16Bytes)
200# endif
201
202 movdqu %xmm3, (%rdi, %rcx)
203 mov %rsi, %rdx
204 lea 16(%rsi, %rcx), %rsi
205 and $-0x40, %rsi
206 sub %rsi, %rdx
207 sub %rdx, %rdi
208# ifdef USE_AS_STRNCPY
209 lea 128(%r8, %rdx), %r8
210# endif
211L(Unaligned64Loop):
212 movaps (%rsi), %xmm2
213 movaps %xmm2, %xmm4
214 movaps 16(%rsi), %xmm5
215 movaps 32(%rsi), %xmm3
216 movaps %xmm3, %xmm6
217 movaps 48(%rsi), %xmm7
218 pminub %xmm5, %xmm2
219 pminub %xmm7, %xmm3
220 pminub %xmm2, %xmm3
221 pcmpeqb %xmm0, %xmm3
222 pmovmskb %xmm3, %rdx
223# ifdef USE_AS_STRNCPY
224 sub $64, %r8
225 jbe L(UnalignedLeaveCase2OrCase3)
226# endif
227 test %rdx, %rdx
228 jnz L(Unaligned64Leave)
229
230L(Unaligned64Loop_start):
231 add $64, %rdi
232 add $64, %rsi
233 movdqu %xmm4, -64(%rdi)
234 movaps (%rsi), %xmm2
235 movdqa %xmm2, %xmm4
236 movdqu %xmm5, -48(%rdi)
237 movaps 16(%rsi), %xmm5
238 pminub %xmm5, %xmm2
239 movaps 32(%rsi), %xmm3
240 movdqu %xmm6, -32(%rdi)
241 movaps %xmm3, %xmm6
242 movdqu %xmm7, -16(%rdi)
243 movaps 48(%rsi), %xmm7
244 pminub %xmm7, %xmm3
245 pminub %xmm2, %xmm3
246 pcmpeqb %xmm0, %xmm3
247 pmovmskb %xmm3, %rdx
248# ifdef USE_AS_STRNCPY
249 sub $64, %r8
250 jbe L(UnalignedLeaveCase2OrCase3)
251# endif
252 test %rdx, %rdx
253 jz L(Unaligned64Loop_start)
254
255L(Unaligned64Leave):
256 pxor %xmm1, %xmm1
257
258 pcmpeqb %xmm4, %xmm0
259 pcmpeqb %xmm5, %xmm1
260 pmovmskb %xmm0, %rdx
261 pmovmskb %xmm1, %rcx
262 test %rdx, %rdx
263 jnz L(CopyFrom1To16BytesUnaligned_0)
264 test %rcx, %rcx
265 jnz L(CopyFrom1To16BytesUnaligned_16)
266
267 pcmpeqb %xmm6, %xmm0
268 pcmpeqb %xmm7, %xmm1
269 pmovmskb %xmm0, %rdx
270 pmovmskb %xmm1, %rcx
271 test %rdx, %rdx
272 jnz L(CopyFrom1To16BytesUnaligned_32)
273
274 bsf %rcx, %rdx
275 movdqu %xmm4, (%rdi)
276 movdqu %xmm5, 16(%rdi)
277 movdqu %xmm6, 32(%rdi)
278# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
279# ifdef USE_AS_STPCPY
280 lea 48(%rdi, %rdx), %rax
281# endif
282 movdqu %xmm7, 48(%rdi)
283 add $15, %r8
284 sub %rdx, %r8
285 lea 49(%rdi, %rdx), %rdi
286 jmp L(StrncpyFillTailWithZero)
287# else
288 add $48, %rsi
289 add $48, %rdi
290 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
291# endif
292
293/* If source address alignment == destination address alignment */
294
295L(SourceStringAlignmentLess32):
296 pxor %xmm0, %xmm0
297 movdqu (%rsi), %xmm1
298 movdqu 16(%rsi), %xmm2
299 pcmpeqb %xmm1, %xmm0
300 pmovmskb %xmm0, %rdx
301
302# ifdef USE_AS_STRNCPY
303# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
304 cmp $16, %r8
305# else
306 cmp $17, %r8
307# endif
308 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
309# endif
310 test %rdx, %rdx
311 jnz L(CopyFrom1To16BytesTail1)
312
313 pcmpeqb %xmm2, %xmm0
314 movdqu %xmm1, (%rdi)
315 pmovmskb %xmm0, %rdx
316
317# ifdef USE_AS_STRNCPY
318# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
319 cmp $32, %r8
320# else
321 cmp $33, %r8
322# endif
323 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
324# endif
325 test %rdx, %rdx
326 jnz L(CopyFrom1To32Bytes1)
327
328 and $-16, %rsi
329 and $15, %rcx
330 jmp L(Unalign16Both)
331
332/*------End of main part with loops---------------------*/
333
334/* Case1 */
335
336# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
337 .p2align 4
338L(CopyFrom1To16Bytes):
339 add %rcx, %rdi
340 add %rcx, %rsi
341 bsf %rdx, %rdx
342 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
343# endif
344 .p2align 4
345L(CopyFrom1To16BytesTail):
346 add %rcx, %rsi
347 bsf %rdx, %rdx
348 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
349
350 .p2align 4
351L(CopyFrom1To32Bytes1):
352 add $16, %rsi
353 add $16, %rdi
354# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
355 sub $16, %r8
356# endif
357L(CopyFrom1To16BytesTail1):
358 bsf %rdx, %rdx
359 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
360
361 .p2align 4
362L(CopyFrom1To32Bytes):
363 bsf %rdx, %rdx
364 add %rcx, %rsi
365 add $16, %rdx
366 sub %rcx, %rdx
367 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
368
369 .p2align 4
370L(CopyFrom1To16BytesUnaligned_0):
371 bsf %rdx, %rdx
372# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
373# ifdef USE_AS_STPCPY
374 lea (%rdi, %rdx), %rax
375# endif
376 movdqu %xmm4, (%rdi)
377 add $63, %r8
378 sub %rdx, %r8
379 lea 1(%rdi, %rdx), %rdi
380 jmp L(StrncpyFillTailWithZero)
381# else
382 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
383# endif
384
385 .p2align 4
386L(CopyFrom1To16BytesUnaligned_16):
387 bsf %rcx, %rdx
388 movdqu %xmm4, (%rdi)
389# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
390# ifdef USE_AS_STPCPY
391 lea 16(%rdi, %rdx), %rax
392# endif
393 movdqu %xmm5, 16(%rdi)
394 add $47, %r8
395 sub %rdx, %r8
396 lea 17(%rdi, %rdx), %rdi
397 jmp L(StrncpyFillTailWithZero)
398# else
399 add $16, %rsi
400 add $16, %rdi
401 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
402# endif
403
404 .p2align 4
405L(CopyFrom1To16BytesUnaligned_32):
406 bsf %rdx, %rdx
407 movdqu %xmm4, (%rdi)
408 movdqu %xmm5, 16(%rdi)
409# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
410# ifdef USE_AS_STPCPY
411 lea 32(%rdi, %rdx), %rax
412# endif
413 movdqu %xmm6, 32(%rdi)
414 add $31, %r8
415 sub %rdx, %r8
416 lea 33(%rdi, %rdx), %rdi
417 jmp L(StrncpyFillTailWithZero)
418# else
419 add $32, %rsi
420 add $32, %rdi
421 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
422# endif
423
424# ifdef USE_AS_STRNCPY
425# ifndef USE_AS_STRCAT
426 .p2align 4
427L(CopyFrom1To16BytesUnalignedXmm6):
428 movdqu %xmm6, (%rdi, %rcx)
429 jmp L(CopyFrom1To16BytesXmmExit)
430
431 .p2align 4
432L(CopyFrom1To16BytesUnalignedXmm5):
433 movdqu %xmm5, (%rdi, %rcx)
434 jmp L(CopyFrom1To16BytesXmmExit)
435
436 .p2align 4
437L(CopyFrom1To16BytesUnalignedXmm4):
438 movdqu %xmm4, (%rdi, %rcx)
439 jmp L(CopyFrom1To16BytesXmmExit)
440
441 .p2align 4
442L(CopyFrom1To16BytesUnalignedXmm3):
443 movdqu %xmm3, (%rdi, %rcx)
444 jmp L(CopyFrom1To16BytesXmmExit)
445
446 .p2align 4
447L(CopyFrom1To16BytesUnalignedXmm1):
448 movdqu %xmm1, (%rdi, %rcx)
449 jmp L(CopyFrom1To16BytesXmmExit)
450# endif
451
452 .p2align 4
453L(CopyFrom1To16BytesExit):
454 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
455
456/* Case2 */
457
458 .p2align 4
459L(CopyFrom1To16BytesCase2):
460 add $16, %r8
461 add %rcx, %rdi
462 add %rcx, %rsi
463 bsf %rdx, %rdx
464 cmp %r8, %rdx
465 jb L(CopyFrom1To16BytesExit)
466 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
467
468 .p2align 4
469L(CopyFrom1To32BytesCase2):
470 add %rcx, %rsi
471 bsf %rdx, %rdx
472 add $16, %rdx
473 sub %rcx, %rdx
474 cmp %r8, %rdx
475 jb L(CopyFrom1To16BytesExit)
476 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
477
478L(CopyFrom1To16BytesTailCase2):
479 add %rcx, %rsi
480 bsf %rdx, %rdx
481 cmp %r8, %rdx
482 jb L(CopyFrom1To16BytesExit)
483 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
484
485L(CopyFrom1To16BytesTail1Case2):
486 bsf %rdx, %rdx
487 cmp %r8, %rdx
488 jb L(CopyFrom1To16BytesExit)
489 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
490
491/* Case2 or Case3, Case3 */
492
493 .p2align 4
494L(CopyFrom1To16BytesCase2OrCase3):
495 test %rdx, %rdx
496 jnz L(CopyFrom1To16BytesCase2)
497L(CopyFrom1To16BytesCase3):
498 add $16, %r8
499 add %rcx, %rdi
500 add %rcx, %rsi
501 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
502
503 .p2align 4
504L(CopyFrom1To32BytesCase2OrCase3):
505 test %rdx, %rdx
506 jnz L(CopyFrom1To32BytesCase2)
507 add %rcx, %rsi
508 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
509
510 .p2align 4
511L(CopyFrom1To16BytesTailCase2OrCase3):
512 test %rdx, %rdx
513 jnz L(CopyFrom1To16BytesTailCase2)
514 add %rcx, %rsi
515 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
516
517 .p2align 4
518L(CopyFrom1To32Bytes1Case2OrCase3):
519 add $16, %rdi
520 add $16, %rsi
521 sub $16, %r8
522L(CopyFrom1To16BytesTail1Case2OrCase3):
523 test %rdx, %rdx
524 jnz L(CopyFrom1To16BytesTail1Case2)
525 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
526
527# endif
528
529/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
530
531 .p2align 4
532L(Exit1):
533 mov %dh, (%rdi)
534# ifdef USE_AS_STPCPY
535 lea (%rdi), %rax
536# endif
537# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
538 sub $1, %r8
539 lea 1(%rdi), %rdi
540 jnz L(StrncpyFillTailWithZero)
541# endif
542 ret
543
544 .p2align 4
545L(Exit2):
546 mov (%rsi), %dx
547 mov %dx, (%rdi)
548# ifdef USE_AS_STPCPY
549 lea 1(%rdi), %rax
550# endif
551# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
552 sub $2, %r8
553 lea 2(%rdi), %rdi
554 jnz L(StrncpyFillTailWithZero)
555# endif
556 ret
557
558 .p2align 4
559L(Exit3):
560 mov (%rsi), %cx
561 mov %cx, (%rdi)
562 mov %dh, 2(%rdi)
563# ifdef USE_AS_STPCPY
564 lea 2(%rdi), %rax
565# endif
566# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
567 sub $3, %r8
568 lea 3(%rdi), %rdi
569 jnz L(StrncpyFillTailWithZero)
570# endif
571 ret
572
573 .p2align 4
574L(Exit4):
575 mov (%rsi), %edx
576 mov %edx, (%rdi)
577# ifdef USE_AS_STPCPY
578 lea 3(%rdi), %rax
579# endif
580# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
581 sub $4, %r8
582 lea 4(%rdi), %rdi
583 jnz L(StrncpyFillTailWithZero)
584# endif
585 ret
586
587 .p2align 4
588L(Exit5):
589 mov (%rsi), %ecx
590 mov %dh, 4(%rdi)
591 mov %ecx, (%rdi)
592# ifdef USE_AS_STPCPY
593 lea 4(%rdi), %rax
594# endif
595# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
596 sub $5, %r8
597 lea 5(%rdi), %rdi
598 jnz L(StrncpyFillTailWithZero)
599# endif
600 ret
601
602 .p2align 4
603L(Exit6):
604 mov (%rsi), %ecx
605 mov 4(%rsi), %dx
606 mov %ecx, (%rdi)
607 mov %dx, 4(%rdi)
608# ifdef USE_AS_STPCPY
609 lea 5(%rdi), %rax
610# endif
611# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
612 sub $6, %r8
613 lea 6(%rdi), %rdi
614 jnz L(StrncpyFillTailWithZero)
615# endif
616 ret
617
618 .p2align 4
619L(Exit7):
620 mov (%rsi), %ecx
621 mov 3(%rsi), %edx
622 mov %ecx, (%rdi)
623 mov %edx, 3(%rdi)
624# ifdef USE_AS_STPCPY
625 lea 6(%rdi), %rax
626# endif
627# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
628 sub $7, %r8
629 lea 7(%rdi), %rdi
630 jnz L(StrncpyFillTailWithZero)
631# endif
632 ret
633
634 .p2align 4
635L(Exit8):
636 mov (%rsi), %rdx
637 mov %rdx, (%rdi)
638# ifdef USE_AS_STPCPY
639 lea 7(%rdi), %rax
640# endif
641# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
642 sub $8, %r8
643 lea 8(%rdi), %rdi
644 jnz L(StrncpyFillTailWithZero)
645# endif
646 ret
647
648 .p2align 4
649L(Exit9):
650 mov (%rsi), %rcx
651 mov %dh, 8(%rdi)
652 mov %rcx, (%rdi)
653# ifdef USE_AS_STPCPY
654 lea 8(%rdi), %rax
655# endif
656# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
657 sub $9, %r8
658 lea 9(%rdi), %rdi
659 jnz L(StrncpyFillTailWithZero)
660# endif
661 ret
662
663 .p2align 4
664L(Exit10):
665 mov (%rsi), %rcx
666 mov 8(%rsi), %dx
667 mov %rcx, (%rdi)
668 mov %dx, 8(%rdi)
669# ifdef USE_AS_STPCPY
670 lea 9(%rdi), %rax
671# endif
672# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
673 sub $10, %r8
674 lea 10(%rdi), %rdi
675 jnz L(StrncpyFillTailWithZero)
676# endif
677 ret
678
679 .p2align 4
680L(Exit11):
681 mov (%rsi), %rcx
682 mov 7(%rsi), %edx
683 mov %rcx, (%rdi)
684 mov %edx, 7(%rdi)
685# ifdef USE_AS_STPCPY
686 lea 10(%rdi), %rax
687# endif
688# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
689 sub $11, %r8
690 lea 11(%rdi), %rdi
691 jnz L(StrncpyFillTailWithZero)
692# endif
693 ret
694
695 .p2align 4
696L(Exit12):
697 mov (%rsi), %rcx
698 mov 8(%rsi), %edx
699 mov %rcx, (%rdi)
700 mov %edx, 8(%rdi)
701# ifdef USE_AS_STPCPY
702 lea 11(%rdi), %rax
703# endif
704# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
705 sub $12, %r8
706 lea 12(%rdi), %rdi
707 jnz L(StrncpyFillTailWithZero)
708# endif
709 ret
710
711 .p2align 4
712L(Exit13):
713 mov (%rsi), %rcx
714 mov 5(%rsi), %rdx
715 mov %rcx, (%rdi)
716 mov %rdx, 5(%rdi)
717# ifdef USE_AS_STPCPY
718 lea 12(%rdi), %rax
719# endif
720# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
721 sub $13, %r8
722 lea 13(%rdi), %rdi
723 jnz L(StrncpyFillTailWithZero)
724# endif
725 ret
726
727 .p2align 4
728L(Exit14):
729 mov (%rsi), %rcx
730 mov 6(%rsi), %rdx
731 mov %rcx, (%rdi)
732 mov %rdx, 6(%rdi)
733# ifdef USE_AS_STPCPY
734 lea 13(%rdi), %rax
735# endif
736# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
737 sub $14, %r8
738 lea 14(%rdi), %rdi
739 jnz L(StrncpyFillTailWithZero)
740# endif
741 ret
742
743 .p2align 4
744L(Exit15):
745 mov (%rsi), %rcx
746 mov 7(%rsi), %rdx
747 mov %rcx, (%rdi)
748 mov %rdx, 7(%rdi)
749# ifdef USE_AS_STPCPY
750 lea 14(%rdi), %rax
751# endif
752# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
753 sub $15, %r8
754 lea 15(%rdi), %rdi
755 jnz L(StrncpyFillTailWithZero)
756# endif
757 ret
758
759 .p2align 4
760L(Exit16):
761 movdqu (%rsi), %xmm0
762 movdqu %xmm0, (%rdi)
763# ifdef USE_AS_STPCPY
764 lea 15(%rdi), %rax
765# endif
766# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
767 sub $16, %r8
768 lea 16(%rdi), %rdi
769 jnz L(StrncpyFillTailWithZero)
770# endif
771 ret
772
773 .p2align 4
774L(Exit17):
775 movdqu (%rsi), %xmm0
776 movdqu %xmm0, (%rdi)
777 mov %dh, 16(%rdi)
778# ifdef USE_AS_STPCPY
779 lea 16(%rdi), %rax
780# endif
781# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
782 sub $17, %r8
783 lea 17(%rdi), %rdi
784 jnz L(StrncpyFillTailWithZero)
785# endif
786 ret
787
788 .p2align 4
789L(Exit18):
790 movdqu (%rsi), %xmm0
791 mov 16(%rsi), %cx
792 movdqu %xmm0, (%rdi)
793 mov %cx, 16(%rdi)
794# ifdef USE_AS_STPCPY
795 lea 17(%rdi), %rax
796# endif
797# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
798 sub $18, %r8
799 lea 18(%rdi), %rdi
800 jnz L(StrncpyFillTailWithZero)
801# endif
802 ret
803
804 .p2align 4
805L(Exit19):
806 movdqu (%rsi), %xmm0
807 mov 15(%rsi), %ecx
808 movdqu %xmm0, (%rdi)
809 mov %ecx, 15(%rdi)
810# ifdef USE_AS_STPCPY
811 lea 18(%rdi), %rax
812# endif
813# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
814 sub $19, %r8
815 lea 19(%rdi), %rdi
816 jnz L(StrncpyFillTailWithZero)
817# endif
818 ret
819
820 .p2align 4
821L(Exit20):
822 movdqu (%rsi), %xmm0
823 mov 16(%rsi), %ecx
824 movdqu %xmm0, (%rdi)
825 mov %ecx, 16(%rdi)
826# ifdef USE_AS_STPCPY
827 lea 19(%rdi), %rax
828# endif
829# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
830 sub $20, %r8
831 lea 20(%rdi), %rdi
832 jnz L(StrncpyFillTailWithZero)
833# endif
834 ret
835
836 .p2align 4
837L(Exit21):
838 movdqu (%rsi), %xmm0
839 mov 16(%rsi), %ecx
840 movdqu %xmm0, (%rdi)
841 mov %ecx, 16(%rdi)
842 mov %dh, 20(%rdi)
843# ifdef USE_AS_STPCPY
844 lea 20(%rdi), %rax
845# endif
846# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
847 sub $21, %r8
848 lea 21(%rdi), %rdi
849 jnz L(StrncpyFillTailWithZero)
850# endif
851 ret
852
853 .p2align 4
854L(Exit22):
855 movdqu (%rsi), %xmm0
856 mov 14(%rsi), %rcx
857 movdqu %xmm0, (%rdi)
858 mov %rcx, 14(%rdi)
859# ifdef USE_AS_STPCPY
860 lea 21(%rdi), %rax
861# endif
862# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
863 sub $22, %r8
864 lea 22(%rdi), %rdi
865 jnz L(StrncpyFillTailWithZero)
866# endif
867 ret
868
869 .p2align 4
870L(Exit23):
871 movdqu (%rsi), %xmm0
872 mov 15(%rsi), %rcx
873 movdqu %xmm0, (%rdi)
874 mov %rcx, 15(%rdi)
875# ifdef USE_AS_STPCPY
876 lea 22(%rdi), %rax
877# endif
878# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
879 sub $23, %r8
880 lea 23(%rdi), %rdi
881 jnz L(StrncpyFillTailWithZero)
882# endif
883 ret
884
885 .p2align 4
886L(Exit24):
887 movdqu (%rsi), %xmm0
888 mov 16(%rsi), %rcx
889 movdqu %xmm0, (%rdi)
890 mov %rcx, 16(%rdi)
891# ifdef USE_AS_STPCPY
892 lea 23(%rdi), %rax
893# endif
894# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
895 sub $24, %r8
896 lea 24(%rdi), %rdi
897 jnz L(StrncpyFillTailWithZero)
898# endif
899 ret
900
901 .p2align 4
902L(Exit25):
903 movdqu (%rsi), %xmm0
904 mov 16(%rsi), %rcx
905 movdqu %xmm0, (%rdi)
906 mov %rcx, 16(%rdi)
907 mov %dh, 24(%rdi)
908# ifdef USE_AS_STPCPY
909 lea 24(%rdi), %rax
910# endif
911# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
912 sub $25, %r8
913 lea 25(%rdi), %rdi
914 jnz L(StrncpyFillTailWithZero)
915# endif
916 ret
917
918 .p2align 4
919L(Exit26):
920 movdqu (%rsi), %xmm0
921 mov 16(%rsi), %rdx
922 mov 24(%rsi), %cx
923 movdqu %xmm0, (%rdi)
924 mov %rdx, 16(%rdi)
925 mov %cx, 24(%rdi)
926# ifdef USE_AS_STPCPY
927 lea 25(%rdi), %rax
928# endif
929# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
930 sub $26, %r8
931 lea 26(%rdi), %rdi
932 jnz L(StrncpyFillTailWithZero)
933# endif
934 ret
935
936 .p2align 4
937L(Exit27):
938 movdqu (%rsi), %xmm0
939 mov 16(%rsi), %rdx
940 mov 23(%rsi), %ecx
941 movdqu %xmm0, (%rdi)
942 mov %rdx, 16(%rdi)
943 mov %ecx, 23(%rdi)
944# ifdef USE_AS_STPCPY
945 lea 26(%rdi), %rax
946# endif
947# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
948 sub $27, %r8
949 lea 27(%rdi), %rdi
950 jnz L(StrncpyFillTailWithZero)
951# endif
952 ret
953
954 .p2align 4
955L(Exit28):
956 movdqu (%rsi), %xmm0
957 mov 16(%rsi), %rdx
958 mov 24(%rsi), %ecx
959 movdqu %xmm0, (%rdi)
960 mov %rdx, 16(%rdi)
961 mov %ecx, 24(%rdi)
962# ifdef USE_AS_STPCPY
963 lea 27(%rdi), %rax
964# endif
965# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
966 sub $28, %r8
967 lea 28(%rdi), %rdi
968 jnz L(StrncpyFillTailWithZero)
969# endif
970 ret
971
972 .p2align 4
973L(Exit29):
974 movdqu (%rsi), %xmm0
975 movdqu 13(%rsi), %xmm2
976 movdqu %xmm0, (%rdi)
977 movdqu %xmm2, 13(%rdi)
978# ifdef USE_AS_STPCPY
979 lea 28(%rdi), %rax
980# endif
981# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
982 sub $29, %r8
983 lea 29(%rdi), %rdi
984 jnz L(StrncpyFillTailWithZero)
985# endif
986 ret
987
988 .p2align 4
989L(Exit30):
990 movdqu (%rsi), %xmm0
991 movdqu 14(%rsi), %xmm2
992 movdqu %xmm0, (%rdi)
993 movdqu %xmm2, 14(%rdi)
994# ifdef USE_AS_STPCPY
995 lea 29(%rdi), %rax
996# endif
997# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
998 sub $30, %r8
999 lea 30(%rdi), %rdi
1000 jnz L(StrncpyFillTailWithZero)
1001# endif
1002 ret
1003
1004 .p2align 4
1005L(Exit31):
1006 movdqu (%rsi), %xmm0
1007 movdqu 15(%rsi), %xmm2
1008 movdqu %xmm0, (%rdi)
1009 movdqu %xmm2, 15(%rdi)
1010# ifdef USE_AS_STPCPY
1011 lea 30(%rdi), %rax
1012# endif
1013# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1014 sub $31, %r8
1015 lea 31(%rdi), %rdi
1016 jnz L(StrncpyFillTailWithZero)
1017# endif
1018 ret
1019
1020 .p2align 4
1021L(Exit32):
1022 movdqu (%rsi), %xmm0
1023 movdqu 16(%rsi), %xmm2
1024 movdqu %xmm0, (%rdi)
1025 movdqu %xmm2, 16(%rdi)
1026# ifdef USE_AS_STPCPY
1027 lea 31(%rdi), %rax
1028# endif
1029# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1030 sub $32, %r8
1031 lea 32(%rdi), %rdi
1032 jnz L(StrncpyFillTailWithZero)
1033# endif
1034 ret
1035
1036# ifdef USE_AS_STRNCPY
1037
1038 .p2align 4
1039L(StrncpyExit0):
1040# ifdef USE_AS_STPCPY
1041 mov %rdi, %rax
1042# endif
1043# ifdef USE_AS_STRCAT
1044 xor %ch, %ch
1045 movb %ch, (%rdi)
1046# endif
1047 ret
1048
1049 .p2align 4
1050L(StrncpyExit1):
1051 mov (%rsi), %dl
1052 mov %dl, (%rdi)
1053# ifdef USE_AS_STPCPY
1054 lea 1(%rdi), %rax
1055# endif
1056# ifdef USE_AS_STRCAT
1057 xor %ch, %ch
1058 movb %ch, 1(%rdi)
1059# endif
1060 ret
1061
1062 .p2align 4
1063L(StrncpyExit2):
1064 mov (%rsi), %dx
1065 mov %dx, (%rdi)
1066# ifdef USE_AS_STPCPY
1067 lea 2(%rdi), %rax
1068# endif
1069# ifdef USE_AS_STRCAT
1070 xor %ch, %ch
1071 movb %ch, 2(%rdi)
1072# endif
1073 ret
1074
1075 .p2align 4
1076L(StrncpyExit3):
1077 mov (%rsi), %cx
1078 mov 2(%rsi), %dl
1079 mov %cx, (%rdi)
1080 mov %dl, 2(%rdi)
1081# ifdef USE_AS_STPCPY
1082 lea 3(%rdi), %rax
1083# endif
1084# ifdef USE_AS_STRCAT
1085 xor %ch, %ch
1086 movb %ch, 3(%rdi)
1087# endif
1088 ret
1089
1090 .p2align 4
1091L(StrncpyExit4):
1092 mov (%rsi), %edx
1093 mov %edx, (%rdi)
1094# ifdef USE_AS_STPCPY
1095 lea 4(%rdi), %rax
1096# endif
1097# ifdef USE_AS_STRCAT
1098 xor %ch, %ch
1099 movb %ch, 4(%rdi)
1100# endif
1101 ret
1102
1103 .p2align 4
1104L(StrncpyExit5):
1105 mov (%rsi), %ecx
1106 mov 4(%rsi), %dl
1107 mov %ecx, (%rdi)
1108 mov %dl, 4(%rdi)
1109# ifdef USE_AS_STPCPY
1110 lea 5(%rdi), %rax
1111# endif
1112# ifdef USE_AS_STRCAT
1113 xor %ch, %ch
1114 movb %ch, 5(%rdi)
1115# endif
1116 ret
1117
1118 .p2align 4
1119L(StrncpyExit6):
1120 mov (%rsi), %ecx
1121 mov 4(%rsi), %dx
1122 mov %ecx, (%rdi)
1123 mov %dx, 4(%rdi)
1124# ifdef USE_AS_STPCPY
1125 lea 6(%rdi), %rax
1126# endif
1127# ifdef USE_AS_STRCAT
1128 xor %ch, %ch
1129 movb %ch, 6(%rdi)
1130# endif
1131 ret
1132
1133 .p2align 4
1134L(StrncpyExit7):
1135 mov (%rsi), %ecx
1136 mov 3(%rsi), %edx
1137 mov %ecx, (%rdi)
1138 mov %edx, 3(%rdi)
1139# ifdef USE_AS_STPCPY
1140 lea 7(%rdi), %rax
1141# endif
1142# ifdef USE_AS_STRCAT
1143 xor %ch, %ch
1144 movb %ch, 7(%rdi)
1145# endif
1146 ret
1147
1148 .p2align 4
1149L(StrncpyExit8):
1150 mov (%rsi), %rdx
1151 mov %rdx, (%rdi)
1152# ifdef USE_AS_STPCPY
1153 lea 8(%rdi), %rax
1154# endif
1155# ifdef USE_AS_STRCAT
1156 xor %ch, %ch
1157 movb %ch, 8(%rdi)
1158# endif
1159 ret
1160
1161 .p2align 4
1162L(StrncpyExit9):
1163 mov (%rsi), %rcx
1164 mov 8(%rsi), %dl
1165 mov %rcx, (%rdi)
1166 mov %dl, 8(%rdi)
1167# ifdef USE_AS_STPCPY
1168 lea 9(%rdi), %rax
1169# endif
1170# ifdef USE_AS_STRCAT
1171 xor %ch, %ch
1172 movb %ch, 9(%rdi)
1173# endif
1174 ret
1175
1176 .p2align 4
1177L(StrncpyExit10):
1178 mov (%rsi), %rcx
1179 mov 8(%rsi), %dx
1180 mov %rcx, (%rdi)
1181 mov %dx, 8(%rdi)
1182# ifdef USE_AS_STPCPY
1183 lea 10(%rdi), %rax
1184# endif
1185# ifdef USE_AS_STRCAT
1186 xor %ch, %ch
1187 movb %ch, 10(%rdi)
1188# endif
1189 ret
1190
1191 .p2align 4
1192L(StrncpyExit11):
1193 mov (%rsi), %rcx
1194 mov 7(%rsi), %edx
1195 mov %rcx, (%rdi)
1196 mov %edx, 7(%rdi)
1197# ifdef USE_AS_STPCPY
1198 lea 11(%rdi), %rax
1199# endif
1200# ifdef USE_AS_STRCAT
1201 xor %ch, %ch
1202 movb %ch, 11(%rdi)
1203# endif
1204 ret
1205
1206 .p2align 4
1207L(StrncpyExit12):
1208 mov (%rsi), %rcx
1209 mov 8(%rsi), %edx
1210 mov %rcx, (%rdi)
1211 mov %edx, 8(%rdi)
1212# ifdef USE_AS_STPCPY
1213 lea 12(%rdi), %rax
1214# endif
1215# ifdef USE_AS_STRCAT
1216 xor %ch, %ch
1217 movb %ch, 12(%rdi)
1218# endif
1219 ret
1220
1221 .p2align 4
1222L(StrncpyExit13):
1223 mov (%rsi), %rcx
1224 mov 5(%rsi), %rdx
1225 mov %rcx, (%rdi)
1226 mov %rdx, 5(%rdi)
1227# ifdef USE_AS_STPCPY
1228 lea 13(%rdi), %rax
1229# endif
1230# ifdef USE_AS_STRCAT
1231 xor %ch, %ch
1232 movb %ch, 13(%rdi)
1233# endif
1234 ret
1235
1236 .p2align 4
1237L(StrncpyExit14):
1238 mov (%rsi), %rcx
1239 mov 6(%rsi), %rdx
1240 mov %rcx, (%rdi)
1241 mov %rdx, 6(%rdi)
1242# ifdef USE_AS_STPCPY
1243 lea 14(%rdi), %rax
1244# endif
1245# ifdef USE_AS_STRCAT
1246 xor %ch, %ch
1247 movb %ch, 14(%rdi)
1248# endif
1249 ret
1250
1251 .p2align 4
1252L(StrncpyExit15):
1253 mov (%rsi), %rcx
1254 mov 7(%rsi), %rdx
1255 mov %rcx, (%rdi)
1256 mov %rdx, 7(%rdi)
1257# ifdef USE_AS_STPCPY
1258 lea 15(%rdi), %rax
1259# endif
1260# ifdef USE_AS_STRCAT
1261 xor %ch, %ch
1262 movb %ch, 15(%rdi)
1263# endif
1264 ret
1265
1266 .p2align 4
1267L(StrncpyExit16):
1268 movdqu (%rsi), %xmm0
1269 movdqu %xmm0, (%rdi)
1270# ifdef USE_AS_STPCPY
1271 lea 16(%rdi), %rax
1272# endif
1273# ifdef USE_AS_STRCAT
1274 xor %ch, %ch
1275 movb %ch, 16(%rdi)
1276# endif
1277 ret
1278
1279 .p2align 4
1280L(StrncpyExit17):
1281 movdqu (%rsi), %xmm0
1282 mov 16(%rsi), %cl
1283 movdqu %xmm0, (%rdi)
1284 mov %cl, 16(%rdi)
1285# ifdef USE_AS_STPCPY
1286 lea 17(%rdi), %rax
1287# endif
1288# ifdef USE_AS_STRCAT
1289 xor %ch, %ch
1290 movb %ch, 17(%rdi)
1291# endif
1292 ret
1293
1294 .p2align 4
1295L(StrncpyExit18):
1296 movdqu (%rsi), %xmm0
1297 mov 16(%rsi), %cx
1298 movdqu %xmm0, (%rdi)
1299 mov %cx, 16(%rdi)
1300# ifdef USE_AS_STPCPY
1301 lea 18(%rdi), %rax
1302# endif
1303# ifdef USE_AS_STRCAT
1304 xor %ch, %ch
1305 movb %ch, 18(%rdi)
1306# endif
1307 ret
1308
1309 .p2align 4
1310L(StrncpyExit19):
1311 movdqu (%rsi), %xmm0
1312 mov 15(%rsi), %ecx
1313 movdqu %xmm0, (%rdi)
1314 mov %ecx, 15(%rdi)
1315# ifdef USE_AS_STPCPY
1316 lea 19(%rdi), %rax
1317# endif
1318# ifdef USE_AS_STRCAT
1319 xor %ch, %ch
1320 movb %ch, 19(%rdi)
1321# endif
1322 ret
1323
1324 .p2align 4
1325L(StrncpyExit20):
1326 movdqu (%rsi), %xmm0
1327 mov 16(%rsi), %ecx
1328 movdqu %xmm0, (%rdi)
1329 mov %ecx, 16(%rdi)
1330# ifdef USE_AS_STPCPY
1331 lea 20(%rdi), %rax
1332# endif
1333# ifdef USE_AS_STRCAT
1334 xor %ch, %ch
1335 movb %ch, 20(%rdi)
1336# endif
1337 ret
1338
1339 .p2align 4
1340L(StrncpyExit21):
1341 movdqu (%rsi), %xmm0
1342 mov 16(%rsi), %ecx
1343 mov 20(%rsi), %dl
1344 movdqu %xmm0, (%rdi)
1345 mov %ecx, 16(%rdi)
1346 mov %dl, 20(%rdi)
1347# ifdef USE_AS_STPCPY
1348 lea 21(%rdi), %rax
1349# endif
1350# ifdef USE_AS_STRCAT
1351 xor %ch, %ch
1352 movb %ch, 21(%rdi)
1353# endif
1354 ret
1355
1356 .p2align 4
1357L(StrncpyExit22):
1358 movdqu (%rsi), %xmm0
1359 mov 14(%rsi), %rcx
1360 movdqu %xmm0, (%rdi)
1361 mov %rcx, 14(%rdi)
1362# ifdef USE_AS_STPCPY
1363 lea 22(%rdi), %rax
1364# endif
1365# ifdef USE_AS_STRCAT
1366 xor %ch, %ch
1367 movb %ch, 22(%rdi)
1368# endif
1369 ret
1370
1371 .p2align 4
1372L(StrncpyExit23):
1373 movdqu (%rsi), %xmm0
1374 mov 15(%rsi), %rcx
1375 movdqu %xmm0, (%rdi)
1376 mov %rcx, 15(%rdi)
1377# ifdef USE_AS_STPCPY
1378 lea 23(%rdi), %rax
1379# endif
1380# ifdef USE_AS_STRCAT
1381 xor %ch, %ch
1382 movb %ch, 23(%rdi)
1383# endif
1384 ret
1385
1386 .p2align 4
1387L(StrncpyExit24):
1388 movdqu (%rsi), %xmm0
1389 mov 16(%rsi), %rcx
1390 movdqu %xmm0, (%rdi)
1391 mov %rcx, 16(%rdi)
1392# ifdef USE_AS_STPCPY
1393 lea 24(%rdi), %rax
1394# endif
1395# ifdef USE_AS_STRCAT
1396 xor %ch, %ch
1397 movb %ch, 24(%rdi)
1398# endif
1399 ret
1400
1401 .p2align 4
1402L(StrncpyExit25):
1403 movdqu (%rsi), %xmm0
1404 mov 16(%rsi), %rdx
1405 mov 24(%rsi), %cl
1406 movdqu %xmm0, (%rdi)
1407 mov %rdx, 16(%rdi)
1408 mov %cl, 24(%rdi)
1409# ifdef USE_AS_STPCPY
1410 lea 25(%rdi), %rax
1411# endif
1412# ifdef USE_AS_STRCAT
1413 xor %ch, %ch
1414 movb %ch, 25(%rdi)
1415# endif
1416 ret
1417
1418 .p2align 4
1419L(StrncpyExit26):
1420 movdqu (%rsi), %xmm0
1421 mov 16(%rsi), %rdx
1422 mov 24(%rsi), %cx
1423 movdqu %xmm0, (%rdi)
1424 mov %rdx, 16(%rdi)
1425 mov %cx, 24(%rdi)
1426# ifdef USE_AS_STPCPY
1427 lea 26(%rdi), %rax
1428# endif
1429# ifdef USE_AS_STRCAT
1430 xor %ch, %ch
1431 movb %ch, 26(%rdi)
1432# endif
1433 ret
1434
1435 .p2align 4
1436L(StrncpyExit27):
1437 movdqu (%rsi), %xmm0
1438 mov 16(%rsi), %rdx
1439 mov 23(%rsi), %ecx
1440 movdqu %xmm0, (%rdi)
1441 mov %rdx, 16(%rdi)
1442 mov %ecx, 23(%rdi)
1443# ifdef USE_AS_STPCPY
1444 lea 27(%rdi), %rax
1445# endif
1446# ifdef USE_AS_STRCAT
1447 xor %ch, %ch
1448 movb %ch, 27(%rdi)
1449# endif
1450 ret
1451
1452 .p2align 4
1453L(StrncpyExit28):
1454 movdqu (%rsi), %xmm0
1455 mov 16(%rsi), %rdx
1456 mov 24(%rsi), %ecx
1457 movdqu %xmm0, (%rdi)
1458 mov %rdx, 16(%rdi)
1459 mov %ecx, 24(%rdi)
1460# ifdef USE_AS_STPCPY
1461 lea 28(%rdi), %rax
1462# endif
1463# ifdef USE_AS_STRCAT
1464 xor %ch, %ch
1465 movb %ch, 28(%rdi)
1466# endif
1467 ret
1468
1469 .p2align 4
1470L(StrncpyExit29):
1471 movdqu (%rsi), %xmm0
1472 movdqu 13(%rsi), %xmm2
1473 movdqu %xmm0, (%rdi)
1474 movdqu %xmm2, 13(%rdi)
1475# ifdef USE_AS_STPCPY
1476 lea 29(%rdi), %rax
1477# endif
1478# ifdef USE_AS_STRCAT
1479 xor %ch, %ch
1480 movb %ch, 29(%rdi)
1481# endif
1482 ret
1483
1484 .p2align 4
1485L(StrncpyExit30):
1486 movdqu (%rsi), %xmm0
1487 movdqu 14(%rsi), %xmm2
1488 movdqu %xmm0, (%rdi)
1489 movdqu %xmm2, 14(%rdi)
1490# ifdef USE_AS_STPCPY
1491 lea 30(%rdi), %rax
1492# endif
1493# ifdef USE_AS_STRCAT
1494 xor %ch, %ch
1495 movb %ch, 30(%rdi)
1496# endif
1497 ret
1498
1499 .p2align 4
1500L(StrncpyExit31):
1501 movdqu (%rsi), %xmm0
1502 movdqu 15(%rsi), %xmm2
1503 movdqu %xmm0, (%rdi)
1504 movdqu %xmm2, 15(%rdi)
1505# ifdef USE_AS_STPCPY
1506 lea 31(%rdi), %rax
1507# endif
1508# ifdef USE_AS_STRCAT
1509 xor %ch, %ch
1510 movb %ch, 31(%rdi)
1511# endif
1512 ret
1513
1514 .p2align 4
1515L(StrncpyExit32):
1516 movdqu (%rsi), %xmm0
1517 movdqu 16(%rsi), %xmm2
1518 movdqu %xmm0, (%rdi)
1519 movdqu %xmm2, 16(%rdi)
1520# ifdef USE_AS_STPCPY
1521 lea 32(%rdi), %rax
1522# endif
1523# ifdef USE_AS_STRCAT
1524 xor %ch, %ch
1525 movb %ch, 32(%rdi)
1526# endif
1527 ret
1528
1529 .p2align 4
1530L(StrncpyExit33):
1531 movdqu (%rsi), %xmm0
1532 movdqu 16(%rsi), %xmm2
1533 mov 32(%rsi), %cl
1534 movdqu %xmm0, (%rdi)
1535 movdqu %xmm2, 16(%rdi)
1536 mov %cl, 32(%rdi)
1537# ifdef USE_AS_STRCAT
1538 xor %ch, %ch
1539 movb %ch, 33(%rdi)
1540# endif
1541 ret
1542
1543# ifndef USE_AS_STRCAT
1544
1545 .p2align 4
1546L(Fill0):
1547 ret
1548
1549 .p2align 4
1550L(Fill1):
1551 mov %dl, (%rdi)
1552 ret
1553
1554 .p2align 4
1555L(Fill2):
1556 mov %dx, (%rdi)
1557 ret
1558
1559 .p2align 4
1560L(Fill3):
1561 mov %edx, -1(%rdi)
1562 ret
1563
1564 .p2align 4
1565L(Fill4):
1566 mov %edx, (%rdi)
1567 ret
1568
1569 .p2align 4
1570L(Fill5):
1571 mov %edx, (%rdi)
1572 mov %dl, 4(%rdi)
1573 ret
1574
1575 .p2align 4
1576L(Fill6):
1577 mov %edx, (%rdi)
1578 mov %dx, 4(%rdi)
1579 ret
1580
1581 .p2align 4
1582L(Fill7):
1583 mov %rdx, -1(%rdi)
1584 ret
1585
1586 .p2align 4
1587L(Fill8):
1588 mov %rdx, (%rdi)
1589 ret
1590
1591 .p2align 4
1592L(Fill9):
1593 mov %rdx, (%rdi)
1594 mov %dl, 8(%rdi)
1595 ret
1596
1597 .p2align 4
1598L(Fill10):
1599 mov %rdx, (%rdi)
1600 mov %dx, 8(%rdi)
1601 ret
1602
1603 .p2align 4
1604L(Fill11):
1605 mov %rdx, (%rdi)
1606 mov %edx, 7(%rdi)
1607 ret
1608
1609 .p2align 4
1610L(Fill12):
1611 mov %rdx, (%rdi)
1612 mov %edx, 8(%rdi)
1613 ret
1614
1615 .p2align 4
1616L(Fill13):
1617 mov %rdx, (%rdi)
1618 mov %rdx, 5(%rdi)
1619 ret
1620
1621 .p2align 4
1622L(Fill14):
1623 mov %rdx, (%rdi)
1624 mov %rdx, 6(%rdi)
1625 ret
1626
1627 .p2align 4
1628L(Fill15):
1629 movdqu %xmm0, -1(%rdi)
1630 ret
1631
1632 .p2align 4
1633L(Fill16):
1634 movdqu %xmm0, (%rdi)
1635 ret
1636
1637 .p2align 4
1638L(CopyFrom1To16BytesUnalignedXmm2):
1639 movdqu %xmm2, (%rdi, %rcx)
1640
1641 .p2align 4
1642L(CopyFrom1To16BytesXmmExit):
1643 bsf %rdx, %rdx
1644 add $15, %r8
1645 add %rcx, %rdi
1646# ifdef USE_AS_STPCPY
1647 lea (%rdi, %rdx), %rax
1648# endif
1649 sub %rdx, %r8
1650 lea 1(%rdi, %rdx), %rdi
1651
1652 .p2align 4
1653L(StrncpyFillTailWithZero):
1654 pxor %xmm0, %xmm0
1655 xor %rdx, %rdx
1656 sub $16, %r8
1657 jbe L(StrncpyFillExit)
1658
1659 movdqu %xmm0, (%rdi)
1660 add $16, %rdi
1661
1662 mov %rdi, %rsi
1663 and $0xf, %rsi
1664 sub %rsi, %rdi
1665 add %rsi, %r8
1666 sub $64, %r8
1667 jb L(StrncpyFillLess64)
1668
1669L(StrncpyFillLoopMovdqa):
1670 movdqa %xmm0, (%rdi)
1671 movdqa %xmm0, 16(%rdi)
1672 movdqa %xmm0, 32(%rdi)
1673 movdqa %xmm0, 48(%rdi)
1674 add $64, %rdi
1675 sub $64, %r8
1676 jae L(StrncpyFillLoopMovdqa)
1677
1678L(StrncpyFillLess64):
1679 add $32, %r8
1680 jl L(StrncpyFillLess32)
1681 movdqa %xmm0, (%rdi)
1682 movdqa %xmm0, 16(%rdi)
1683 add $32, %rdi
1684 sub $16, %r8
1685 jl L(StrncpyFillExit)
1686 movdqa %xmm0, (%rdi)
1687 add $16, %rdi
1688 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1689
1690L(StrncpyFillLess32):
1691 add $16, %r8
1692 jl L(StrncpyFillExit)
1693 movdqa %xmm0, (%rdi)
1694 add $16, %rdi
1695 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1696
1697L(StrncpyFillExit):
1698 add $16, %r8
1699 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1700
1701/* end of ifndef USE_AS_STRCAT */
1702# endif
1703
1704 .p2align 4
1705L(UnalignedLeaveCase2OrCase3):
1706 test %rdx, %rdx
1707 jnz L(Unaligned64LeaveCase2)
1708L(Unaligned64LeaveCase3):
1709 lea 64(%r8), %rcx
1710 and $-16, %rcx
1711 add $48, %r8
1712 jl L(CopyFrom1To16BytesCase3)
1713 movdqu %xmm4, (%rdi)
1714 sub $16, %r8
1715 jb L(CopyFrom1To16BytesCase3)
1716 movdqu %xmm5, 16(%rdi)
1717 sub $16, %r8
1718 jb L(CopyFrom1To16BytesCase3)
1719 movdqu %xmm6, 32(%rdi)
1720 sub $16, %r8
1721 jb L(CopyFrom1To16BytesCase3)
1722 movdqu %xmm7, 48(%rdi)
1723# ifdef USE_AS_STPCPY
1724 lea 64(%rdi), %rax
1725# endif
1726# ifdef USE_AS_STRCAT
1727 xor %ch, %ch
1728 movb %ch, 64(%rdi)
1729# endif
1730 ret
1731
1732 .p2align 4
1733L(Unaligned64LeaveCase2):
1734 xor %rcx, %rcx
1735 pcmpeqb %xmm4, %xmm0
1736 pmovmskb %xmm0, %rdx
1737 add $48, %r8
1738 jle L(CopyFrom1To16BytesCase2OrCase3)
1739 test %rdx, %rdx
1740# ifndef USE_AS_STRCAT
1741 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1742# else
1743 jnz L(CopyFrom1To16Bytes)
1744# endif
1745 pcmpeqb %xmm5, %xmm0
1746 pmovmskb %xmm0, %rdx
1747 movdqu %xmm4, (%rdi)
1748 add $16, %rcx
1749 sub $16, %r8
1750 jbe L(CopyFrom1To16BytesCase2OrCase3)
1751 test %rdx, %rdx
1752# ifndef USE_AS_STRCAT
1753 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1754# else
1755 jnz L(CopyFrom1To16Bytes)
1756# endif
1757
1758 pcmpeqb %xmm6, %xmm0
1759 pmovmskb %xmm0, %rdx
1760 movdqu %xmm5, 16(%rdi)
1761 add $16, %rcx
1762 sub $16, %r8
1763 jbe L(CopyFrom1To16BytesCase2OrCase3)
1764 test %rdx, %rdx
1765# ifndef USE_AS_STRCAT
1766 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1767# else
1768 jnz L(CopyFrom1To16Bytes)
1769# endif
1770
1771 pcmpeqb %xmm7, %xmm0
1772 pmovmskb %xmm0, %rdx
1773 movdqu %xmm6, 32(%rdi)
1774 lea 16(%rdi, %rcx), %rdi
1775 lea 16(%rsi, %rcx), %rsi
1776 bsf %rdx, %rdx
1777 cmp %r8, %rdx
1778 jb L(CopyFrom1To16BytesExit)
1779 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1780
1781 .p2align 4
1782L(ExitZero):
1783# ifndef USE_AS_STRCAT
1784 mov %rdi, %rax
1785# endif
1786 ret
1787
1788# endif
1789
1790# ifndef USE_AS_STRCAT
1791END (STRCPY)
1792# else
1793END (STRCAT)
1794# endif
1795 .p2align 4
1796 .section .rodata
1797L(ExitTable):
1798 .int JMPTBL(L(Exit1), L(ExitTable))
1799 .int JMPTBL(L(Exit2), L(ExitTable))
1800 .int JMPTBL(L(Exit3), L(ExitTable))
1801 .int JMPTBL(L(Exit4), L(ExitTable))
1802 .int JMPTBL(L(Exit5), L(ExitTable))
1803 .int JMPTBL(L(Exit6), L(ExitTable))
1804 .int JMPTBL(L(Exit7), L(ExitTable))
1805 .int JMPTBL(L(Exit8), L(ExitTable))
1806 .int JMPTBL(L(Exit9), L(ExitTable))
1807 .int JMPTBL(L(Exit10), L(ExitTable))
1808 .int JMPTBL(L(Exit11), L(ExitTable))
1809 .int JMPTBL(L(Exit12), L(ExitTable))
1810 .int JMPTBL(L(Exit13), L(ExitTable))
1811 .int JMPTBL(L(Exit14), L(ExitTable))
1812 .int JMPTBL(L(Exit15), L(ExitTable))
1813 .int JMPTBL(L(Exit16), L(ExitTable))
1814 .int JMPTBL(L(Exit17), L(ExitTable))
1815 .int JMPTBL(L(Exit18), L(ExitTable))
1816 .int JMPTBL(L(Exit19), L(ExitTable))
1817 .int JMPTBL(L(Exit20), L(ExitTable))
1818 .int JMPTBL(L(Exit21), L(ExitTable))
1819 .int JMPTBL(L(Exit22), L(ExitTable))
1820 .int JMPTBL(L(Exit23), L(ExitTable))
1821 .int JMPTBL(L(Exit24), L(ExitTable))
1822 .int JMPTBL(L(Exit25), L(ExitTable))
1823 .int JMPTBL(L(Exit26), L(ExitTable))
1824 .int JMPTBL(L(Exit27), L(ExitTable))
1825 .int JMPTBL(L(Exit28), L(ExitTable))
1826 .int JMPTBL(L(Exit29), L(ExitTable))
1827 .int JMPTBL(L(Exit30), L(ExitTable))
1828 .int JMPTBL(L(Exit31), L(ExitTable))
1829 .int JMPTBL(L(Exit32), L(ExitTable))
1830# ifdef USE_AS_STRNCPY
1831L(ExitStrncpyTable):
1832 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1833 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1834 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1835 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1836 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1837 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1838 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1839 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1840 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1841 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1842 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1843 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1844 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1845 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1846 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1847 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1848 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1849 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1850 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1851 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1852 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1853 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1854 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1855 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1856 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1857 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1858 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1859 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1860 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1861 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1862 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1863 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1864 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1865 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1866# ifndef USE_AS_STRCAT
1867 .p2align 4
1868L(FillTable):
1869 .int JMPTBL(L(Fill0), L(FillTable))
1870 .int JMPTBL(L(Fill1), L(FillTable))
1871 .int JMPTBL(L(Fill2), L(FillTable))
1872 .int JMPTBL(L(Fill3), L(FillTable))
1873 .int JMPTBL(L(Fill4), L(FillTable))
1874 .int JMPTBL(L(Fill5), L(FillTable))
1875 .int JMPTBL(L(Fill6), L(FillTable))
1876 .int JMPTBL(L(Fill7), L(FillTable))
1877 .int JMPTBL(L(Fill8), L(FillTable))
1878 .int JMPTBL(L(Fill9), L(FillTable))
1879 .int JMPTBL(L(Fill10), L(FillTable))
1880 .int JMPTBL(L(Fill11), L(FillTable))
1881 .int JMPTBL(L(Fill12), L(FillTable))
1882 .int JMPTBL(L(Fill13), L(FillTable))
1883 .int JMPTBL(L(Fill14), L(FillTable))
1884 .int JMPTBL(L(Fill15), L(FillTable))
1885 .int JMPTBL(L(Fill16), L(FillTable))
1886# endif
1887# endif
1888#endif
1889

source code of glibc/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S