1/* strcat with SSSE3
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22
23# ifndef STRCAT
24# define STRCAT __strcat_ssse3
25# endif
26
27# define USE_AS_STRCAT
28
29.text
30ENTRY (STRCAT)
31# ifdef USE_AS_STRNCAT
32 mov %rdx, %r8
33# endif
34
35
36/* Inline corresponding strlen file, temporary until new strcpy
37 implementation gets merged. */
38
39 xor %eax, %eax
40 cmpb $0, (%rdi)
41 jz L(exit_tail0)
42 cmpb $0, 1(%rdi)
43 jz L(exit_tail1)
44 cmpb $0, 2(%rdi)
45 jz L(exit_tail2)
46 cmpb $0, 3(%rdi)
47 jz L(exit_tail3)
48
49 cmpb $0, 4(%rdi)
50 jz L(exit_tail4)
51 cmpb $0, 5(%rdi)
52 jz L(exit_tail5)
53 cmpb $0, 6(%rdi)
54 jz L(exit_tail6)
55 cmpb $0, 7(%rdi)
56 jz L(exit_tail7)
57
58 cmpb $0, 8(%rdi)
59 jz L(exit_tail8)
60 cmpb $0, 9(%rdi)
61 jz L(exit_tail9)
62 cmpb $0, 10(%rdi)
63 jz L(exit_tail10)
64 cmpb $0, 11(%rdi)
65 jz L(exit_tail11)
66
67 cmpb $0, 12(%rdi)
68 jz L(exit_tail12)
69 cmpb $0, 13(%rdi)
70 jz L(exit_tail13)
71 cmpb $0, 14(%rdi)
72 jz L(exit_tail14)
73 cmpb $0, 15(%rdi)
74 jz L(exit_tail15)
75 pxor %xmm0, %xmm0
76 lea 16(%rdi), %rcx
77 lea 16(%rdi), %rax
78 and $-16, %rax
79
80 pcmpeqb (%rax), %xmm0
81 pmovmskb %xmm0, %edx
82 pxor %xmm1, %xmm1
83 test %edx, %edx
84 lea 16(%rax), %rax
85 jnz L(exit)
86
87 pcmpeqb (%rax), %xmm1
88 pmovmskb %xmm1, %edx
89 pxor %xmm2, %xmm2
90 test %edx, %edx
91 lea 16(%rax), %rax
92 jnz L(exit)
93
94 pcmpeqb (%rax), %xmm2
95 pmovmskb %xmm2, %edx
96 pxor %xmm3, %xmm3
97 test %edx, %edx
98 lea 16(%rax), %rax
99 jnz L(exit)
100
101 pcmpeqb (%rax), %xmm3
102 pmovmskb %xmm3, %edx
103 test %edx, %edx
104 lea 16(%rax), %rax
105 jnz L(exit)
106
107 pcmpeqb (%rax), %xmm0
108 pmovmskb %xmm0, %edx
109 test %edx, %edx
110 lea 16(%rax), %rax
111 jnz L(exit)
112
113 pcmpeqb (%rax), %xmm1
114 pmovmskb %xmm1, %edx
115 test %edx, %edx
116 lea 16(%rax), %rax
117 jnz L(exit)
118
119 pcmpeqb (%rax), %xmm2
120 pmovmskb %xmm2, %edx
121 test %edx, %edx
122 lea 16(%rax), %rax
123 jnz L(exit)
124
125 pcmpeqb (%rax), %xmm3
126 pmovmskb %xmm3, %edx
127 test %edx, %edx
128 lea 16(%rax), %rax
129 jnz L(exit)
130
131 pcmpeqb (%rax), %xmm0
132 pmovmskb %xmm0, %edx
133 test %edx, %edx
134 lea 16(%rax), %rax
135 jnz L(exit)
136
137 pcmpeqb (%rax), %xmm1
138 pmovmskb %xmm1, %edx
139 test %edx, %edx
140 lea 16(%rax), %rax
141 jnz L(exit)
142
143 pcmpeqb (%rax), %xmm2
144 pmovmskb %xmm2, %edx
145 test %edx, %edx
146 lea 16(%rax), %rax
147 jnz L(exit)
148
149 pcmpeqb (%rax), %xmm3
150 pmovmskb %xmm3, %edx
151 test %edx, %edx
152 lea 16(%rax), %rax
153 jnz L(exit)
154
155 pcmpeqb (%rax), %xmm0
156 pmovmskb %xmm0, %edx
157 test %edx, %edx
158 lea 16(%rax), %rax
159 jnz L(exit)
160
161 pcmpeqb (%rax), %xmm1
162 pmovmskb %xmm1, %edx
163 test %edx, %edx
164 lea 16(%rax), %rax
165 jnz L(exit)
166
167 pcmpeqb (%rax), %xmm2
168 pmovmskb %xmm2, %edx
169 test %edx, %edx
170 lea 16(%rax), %rax
171 jnz L(exit)
172
173 pcmpeqb (%rax), %xmm3
174 pmovmskb %xmm3, %edx
175 test %edx, %edx
176 lea 16(%rax), %rax
177 jnz L(exit)
178
179 and $-0x40, %rax
180
181 .p2align 4
182L(aligned_64):
183 pcmpeqb (%rax), %xmm0
184 pcmpeqb 16(%rax), %xmm1
185 pcmpeqb 32(%rax), %xmm2
186 pcmpeqb 48(%rax), %xmm3
187 pmovmskb %xmm0, %edx
188 pmovmskb %xmm1, %r11d
189 pmovmskb %xmm2, %r10d
190 pmovmskb %xmm3, %r9d
191 or %edx, %r9d
192 or %r11d, %r9d
193 or %r10d, %r9d
194 lea 64(%rax), %rax
195 jz L(aligned_64)
196
197 test %edx, %edx
198 jnz L(aligned_64_exit_16)
199 test %r11d, %r11d
200 jnz L(aligned_64_exit_32)
201 test %r10d, %r10d
202 jnz L(aligned_64_exit_48)
203
204L(aligned_64_exit_64):
205 pmovmskb %xmm3, %edx
206 jmp L(exit)
207
208L(aligned_64_exit_48):
209 lea -16(%rax), %rax
210 mov %r10d, %edx
211 jmp L(exit)
212
213L(aligned_64_exit_32):
214 lea -32(%rax), %rax
215 mov %r11d, %edx
216 jmp L(exit)
217
218L(aligned_64_exit_16):
219 lea -48(%rax), %rax
220
221L(exit):
222 sub %rcx, %rax
223 test %dl, %dl
224 jz L(exit_high)
225 test $0x01, %dl
226 jnz L(exit_tail0)
227
228 test $0x02, %dl
229 jnz L(exit_tail1)
230
231 test $0x04, %dl
232 jnz L(exit_tail2)
233
234 test $0x08, %dl
235 jnz L(exit_tail3)
236
237 test $0x10, %dl
238 jnz L(exit_tail4)
239
240 test $0x20, %dl
241 jnz L(exit_tail5)
242
243 test $0x40, %dl
244 jnz L(exit_tail6)
245 add $7, %eax
246L(exit_tail0):
247 jmp L(StartStrcpyPart)
248
249 .p2align 4
250L(exit_high):
251 add $8, %eax
252 test $0x01, %dh
253 jnz L(exit_tail0)
254
255 test $0x02, %dh
256 jnz L(exit_tail1)
257
258 test $0x04, %dh
259 jnz L(exit_tail2)
260
261 test $0x08, %dh
262 jnz L(exit_tail3)
263
264 test $0x10, %dh
265 jnz L(exit_tail4)
266
267 test $0x20, %dh
268 jnz L(exit_tail5)
269
270 test $0x40, %dh
271 jnz L(exit_tail6)
272 add $7, %eax
273 jmp L(StartStrcpyPart)
274
275 .p2align 4
276L(exit_tail1):
277 add $1, %eax
278 jmp L(StartStrcpyPart)
279
280 .p2align 4
281L(exit_tail2):
282 add $2, %eax
283 jmp L(StartStrcpyPart)
284
285 .p2align 4
286L(exit_tail3):
287 add $3, %eax
288 jmp L(StartStrcpyPart)
289
290 .p2align 4
291L(exit_tail4):
292 add $4, %eax
293 jmp L(StartStrcpyPart)
294
295 .p2align 4
296L(exit_tail5):
297 add $5, %eax
298 jmp L(StartStrcpyPart)
299
300 .p2align 4
301L(exit_tail6):
302 add $6, %eax
303 jmp L(StartStrcpyPart)
304
305 .p2align 4
306L(exit_tail7):
307 add $7, %eax
308 jmp L(StartStrcpyPart)
309
310 .p2align 4
311L(exit_tail8):
312 add $8, %eax
313 jmp L(StartStrcpyPart)
314
315 .p2align 4
316L(exit_tail9):
317 add $9, %eax
318 jmp L(StartStrcpyPart)
319
320 .p2align 4
321L(exit_tail10):
322 add $10, %eax
323 jmp L(StartStrcpyPart)
324
325 .p2align 4
326L(exit_tail11):
327 add $11, %eax
328 jmp L(StartStrcpyPart)
329
330 .p2align 4
331L(exit_tail12):
332 add $12, %eax
333 jmp L(StartStrcpyPart)
334
335 .p2align 4
336L(exit_tail13):
337 add $13, %eax
338 jmp L(StartStrcpyPart)
339
340 .p2align 4
341L(exit_tail14):
342 add $14, %eax
343 jmp L(StartStrcpyPart)
344
345 .p2align 4
346L(exit_tail15):
347 add $15, %eax
348
349 .p2align 4
350L(StartStrcpyPart):
351 mov %rsi, %rcx
352 lea (%rdi, %rax), %rdx
353# ifdef USE_AS_STRNCAT
354 test %r8, %r8
355 jz L(StrncatExit0)
356 cmp $8, %r8
357 jbe L(StrncatExit8Bytes)
358# endif
359 cmpb $0, (%rcx)
360 jz L(Exit1)
361 cmpb $0, 1(%rcx)
362 jz L(Exit2)
363 cmpb $0, 2(%rcx)
364 jz L(Exit3)
365 cmpb $0, 3(%rcx)
366 jz L(Exit4)
367 cmpb $0, 4(%rcx)
368 jz L(Exit5)
369 cmpb $0, 5(%rcx)
370 jz L(Exit6)
371 cmpb $0, 6(%rcx)
372 jz L(Exit7)
373 cmpb $0, 7(%rcx)
374 jz L(Exit8)
375 cmpb $0, 8(%rcx)
376 jz L(Exit9)
377# ifdef USE_AS_STRNCAT
378 cmp $16, %r8
379 jb L(StrncatExit15Bytes)
380# endif
381 cmpb $0, 9(%rcx)
382 jz L(Exit10)
383 cmpb $0, 10(%rcx)
384 jz L(Exit11)
385 cmpb $0, 11(%rcx)
386 jz L(Exit12)
387 cmpb $0, 12(%rcx)
388 jz L(Exit13)
389 cmpb $0, 13(%rcx)
390 jz L(Exit14)
391 cmpb $0, 14(%rcx)
392 jz L(Exit15)
393 cmpb $0, 15(%rcx)
394 jz L(Exit16)
395# ifdef USE_AS_STRNCAT
396 cmp $16, %r8
397 je L(StrncatExit16)
398# define USE_AS_STRNCPY
399# endif
400
401# include "strcpy-ssse3.S"
402
403 .p2align 4
404L(CopyFrom1To16Bytes):
405 add %rsi, %rdx
406 add %rsi, %rcx
407
408 test %al, %al
409 jz L(ExitHigh)
410 test $0x01, %al
411 jnz L(Exit1)
412 test $0x02, %al
413 jnz L(Exit2)
414 test $0x04, %al
415 jnz L(Exit3)
416 test $0x08, %al
417 jnz L(Exit4)
418 test $0x10, %al
419 jnz L(Exit5)
420 test $0x20, %al
421 jnz L(Exit6)
422 test $0x40, %al
423 jnz L(Exit7)
424 movlpd (%rcx), %xmm0
425 movlpd %xmm0, (%rdx)
426 mov %rdi, %rax
427 ret
428
429 .p2align 4
430L(ExitHigh):
431 test $0x01, %ah
432 jnz L(Exit9)
433 test $0x02, %ah
434 jnz L(Exit10)
435 test $0x04, %ah
436 jnz L(Exit11)
437 test $0x08, %ah
438 jnz L(Exit12)
439 test $0x10, %ah
440 jnz L(Exit13)
441 test $0x20, %ah
442 jnz L(Exit14)
443 test $0x40, %ah
444 jnz L(Exit15)
445 movlpd (%rcx), %xmm0
446 movlpd 8(%rcx), %xmm1
447 movlpd %xmm0, (%rdx)
448 movlpd %xmm1, 8(%rdx)
449 mov %rdi, %rax
450 ret
451
452 .p2align 4
453L(StrncatExit1):
454 xor %ah, %ah
455 movb %ah, 1(%rdx)
456L(Exit1):
457 movb (%rcx), %al
458 movb %al, (%rdx)
459 mov %rdi, %rax
460 ret
461
462 .p2align 4
463L(StrncatExit2):
464 xor %ah, %ah
465 movb %ah, 2(%rdx)
466L(Exit2):
467 movw (%rcx), %ax
468 movw %ax, (%rdx)
469 mov %rdi, %rax
470 ret
471
472 .p2align 4
473L(StrncatExit3):
474 xor %ah, %ah
475 movb %ah, 3(%rdx)
476L(Exit3):
477 movw (%rcx), %ax
478 movw %ax, (%rdx)
479 movb 2(%rcx), %al
480 movb %al, 2(%rdx)
481 mov %rdi, %rax
482 ret
483
484 .p2align 4
485L(StrncatExit4):
486 xor %ah, %ah
487 movb %ah, 4(%rdx)
488L(Exit4):
489 mov (%rcx), %eax
490 mov %eax, (%rdx)
491 mov %rdi, %rax
492 ret
493
494 .p2align 4
495L(StrncatExit5):
496 xor %ah, %ah
497 movb %ah, 5(%rdx)
498L(Exit5):
499 mov (%rcx), %eax
500 mov %eax, (%rdx)
501 movb 4(%rcx), %al
502 movb %al, 4(%rdx)
503 mov %rdi, %rax
504 ret
505
506 .p2align 4
507L(StrncatExit6):
508 xor %ah, %ah
509 movb %ah, 6(%rdx)
510L(Exit6):
511 mov (%rcx), %eax
512 mov %eax, (%rdx)
513 movw 4(%rcx), %ax
514 movw %ax, 4(%rdx)
515 mov %rdi, %rax
516 ret
517
518 .p2align 4
519L(StrncatExit7):
520 xor %ah, %ah
521 movb %ah, 7(%rdx)
522L(Exit7):
523 mov (%rcx), %eax
524 mov %eax, (%rdx)
525 mov 3(%rcx), %eax
526 mov %eax, 3(%rdx)
527 mov %rdi, %rax
528 ret
529
530 .p2align 4
531L(StrncatExit8):
532 xor %ah, %ah
533 movb %ah, 8(%rdx)
534L(Exit8):
535 movlpd (%rcx), %xmm0
536 movlpd %xmm0, (%rdx)
537 mov %rdi, %rax
538 ret
539
540 .p2align 4
541L(StrncatExit9):
542 xor %ah, %ah
543 movb %ah, 9(%rdx)
544L(Exit9):
545 movlpd (%rcx), %xmm0
546 movlpd %xmm0, (%rdx)
547 movb 8(%rcx), %al
548 movb %al, 8(%rdx)
549 mov %rdi, %rax
550 ret
551
552 .p2align 4
553L(StrncatExit10):
554 xor %ah, %ah
555 movb %ah, 10(%rdx)
556L(Exit10):
557 movlpd (%rcx), %xmm0
558 movlpd %xmm0, (%rdx)
559 movw 8(%rcx), %ax
560 movw %ax, 8(%rdx)
561 mov %rdi, %rax
562 ret
563
564 .p2align 4
565L(StrncatExit11):
566 xor %ah, %ah
567 movb %ah, 11(%rdx)
568L(Exit11):
569 movlpd (%rcx), %xmm0
570 movlpd %xmm0, (%rdx)
571 mov 7(%rcx), %eax
572 mov %eax, 7(%rdx)
573 mov %rdi, %rax
574 ret
575
576 .p2align 4
577L(StrncatExit12):
578 xor %ah, %ah
579 movb %ah, 12(%rdx)
580L(Exit12):
581 movlpd (%rcx), %xmm0
582 movlpd %xmm0, (%rdx)
583 mov 8(%rcx), %eax
584 mov %eax, 8(%rdx)
585 mov %rdi, %rax
586 ret
587
588 .p2align 4
589L(StrncatExit13):
590 xor %ah, %ah
591 movb %ah, 13(%rdx)
592L(Exit13):
593 movlpd (%rcx), %xmm0
594 movlpd %xmm0, (%rdx)
595 movlpd 5(%rcx), %xmm1
596 movlpd %xmm1, 5(%rdx)
597 mov %rdi, %rax
598 ret
599
600 .p2align 4
601L(StrncatExit14):
602 xor %ah, %ah
603 movb %ah, 14(%rdx)
604L(Exit14):
605 movlpd (%rcx), %xmm0
606 movlpd %xmm0, (%rdx)
607 movlpd 6(%rcx), %xmm1
608 movlpd %xmm1, 6(%rdx)
609 mov %rdi, %rax
610 ret
611
612 .p2align 4
613L(StrncatExit15):
614 xor %ah, %ah
615 movb %ah, 15(%rdx)
616L(Exit15):
617 movlpd (%rcx), %xmm0
618 movlpd %xmm0, (%rdx)
619 movlpd 7(%rcx), %xmm1
620 movlpd %xmm1, 7(%rdx)
621 mov %rdi, %rax
622 ret
623
624 .p2align 4
625L(StrncatExit16):
626 xor %ah, %ah
627 movb %ah, 16(%rdx)
628L(Exit16):
629 movlpd (%rcx), %xmm0
630 movlpd 8(%rcx), %xmm1
631 movlpd %xmm0, (%rdx)
632 movlpd %xmm1, 8(%rdx)
633 mov %rdi, %rax
634 ret
635
636# ifdef USE_AS_STRNCPY
637
638 .p2align 4
639L(CopyFrom1To16BytesCase2):
640 add $16, %r8
641 add %rsi, %rcx
642 lea (%rsi, %rdx), %rsi
643 lea -9(%r8), %rdx
644 and $1<<7, %dh
645 or %al, %dh
646 test %dh, %dh
647 lea (%rsi), %rdx
648 jz L(ExitHighCase2)
649
650 test $0x01, %al
651 jnz L(Exit1)
652 cmp $1, %r8
653 je L(StrncatExit1)
654 test $0x02, %al
655 jnz L(Exit2)
656 cmp $2, %r8
657 je L(StrncatExit2)
658 test $0x04, %al
659 jnz L(Exit3)
660 cmp $3, %r8
661 je L(StrncatExit3)
662 test $0x08, %al
663 jnz L(Exit4)
664 cmp $4, %r8
665 je L(StrncatExit4)
666 test $0x10, %al
667 jnz L(Exit5)
668 cmp $5, %r8
669 je L(StrncatExit5)
670 test $0x20, %al
671 jnz L(Exit6)
672 cmp $6, %r8
673 je L(StrncatExit6)
674 test $0x40, %al
675 jnz L(Exit7)
676 cmp $7, %r8
677 je L(StrncatExit7)
678 movlpd (%rcx), %xmm0
679 movlpd %xmm0, (%rdx)
680 lea 7(%rdx), %rax
681 cmpb $1, (%rax)
682 sbb $-1, %rax
683 xor %cl, %cl
684 movb %cl, (%rax)
685 mov %rdi, %rax
686 ret
687
688 .p2align 4
689L(ExitHighCase2):
690 test $0x01, %ah
691 jnz L(Exit9)
692 cmp $9, %r8
693 je L(StrncatExit9)
694 test $0x02, %ah
695 jnz L(Exit10)
696 cmp $10, %r8
697 je L(StrncatExit10)
698 test $0x04, %ah
699 jnz L(Exit11)
700 cmp $11, %r8
701 je L(StrncatExit11)
702 test $0x8, %ah
703 jnz L(Exit12)
704 cmp $12, %r8
705 je L(StrncatExit12)
706 test $0x10, %ah
707 jnz L(Exit13)
708 cmp $13, %r8
709 je L(StrncatExit13)
710 test $0x20, %ah
711 jnz L(Exit14)
712 cmp $14, %r8
713 je L(StrncatExit14)
714 test $0x40, %ah
715 jnz L(Exit15)
716 cmp $15, %r8
717 je L(StrncatExit15)
718 movlpd (%rcx), %xmm0
719 movlpd %xmm0, (%rdx)
720 movlpd 8(%rcx), %xmm1
721 movlpd %xmm1, 8(%rdx)
722 mov %rdi, %rax
723 ret
724
725L(CopyFrom1To16BytesCase2OrCase3):
726 test %rax, %rax
727 jnz L(CopyFrom1To16BytesCase2)
728
729 .p2align 4
730L(CopyFrom1To16BytesCase3):
731 add $16, %r8
732 add %rsi, %rdx
733 add %rsi, %rcx
734
735 cmp $8, %r8
736 ja L(ExitHighCase3)
737 cmp $1, %r8
738 je L(StrncatExit1)
739 cmp $2, %r8
740 je L(StrncatExit2)
741 cmp $3, %r8
742 je L(StrncatExit3)
743 cmp $4, %r8
744 je L(StrncatExit4)
745 cmp $5, %r8
746 je L(StrncatExit5)
747 cmp $6, %r8
748 je L(StrncatExit6)
749 cmp $7, %r8
750 je L(StrncatExit7)
751 movlpd (%rcx), %xmm0
752 movlpd %xmm0, (%rdx)
753 xor %ah, %ah
754 movb %ah, 8(%rdx)
755 mov %rdi, %rax
756 ret
757
758 .p2align 4
759L(ExitHighCase3):
760 cmp $9, %r8
761 je L(StrncatExit9)
762 cmp $10, %r8
763 je L(StrncatExit10)
764 cmp $11, %r8
765 je L(StrncatExit11)
766 cmp $12, %r8
767 je L(StrncatExit12)
768 cmp $13, %r8
769 je L(StrncatExit13)
770 cmp $14, %r8
771 je L(StrncatExit14)
772 cmp $15, %r8
773 je L(StrncatExit15)
774 movlpd (%rcx), %xmm0
775 movlpd %xmm0, (%rdx)
776 movlpd 8(%rcx), %xmm1
777 movlpd %xmm1, 8(%rdx)
778 xor %ah, %ah
779 movb %ah, 16(%rdx)
780 mov %rdi, %rax
781 ret
782
783 .p2align 4
784L(StrncatExit0):
785 mov %rdi, %rax
786 ret
787
788 .p2align 4
789L(StrncatExit15Bytes):
790 cmp $9, %r8
791 je L(StrncatExit9)
792 cmpb $0, 9(%rcx)
793 jz L(Exit10)
794 cmp $10, %r8
795 je L(StrncatExit10)
796 cmpb $0, 10(%rcx)
797 jz L(Exit11)
798 cmp $11, %r8
799 je L(StrncatExit11)
800 cmpb $0, 11(%rcx)
801 jz L(Exit12)
802 cmp $12, %r8
803 je L(StrncatExit12)
804 cmpb $0, 12(%rcx)
805 jz L(Exit13)
806 cmp $13, %r8
807 je L(StrncatExit13)
808 cmpb $0, 13(%rcx)
809 jz L(Exit14)
810 cmp $14, %r8
811 je L(StrncatExit14)
812 movlpd (%rcx), %xmm0
813 movlpd %xmm0, (%rdx)
814 movlpd 7(%rcx), %xmm1
815 movlpd %xmm1, 7(%rdx)
816 lea 14(%rdx), %rax
817 cmpb $1, (%rax)
818 sbb $-1, %rax
819 xor %cl, %cl
820 movb %cl, (%rax)
821 mov %rdi, %rax
822 ret
823
824 .p2align 4
825L(StrncatExit8Bytes):
826 cmpb $0, (%rcx)
827 jz L(Exit1)
828 cmp $1, %r8
829 je L(StrncatExit1)
830 cmpb $0, 1(%rcx)
831 jz L(Exit2)
832 cmp $2, %r8
833 je L(StrncatExit2)
834 cmpb $0, 2(%rcx)
835 jz L(Exit3)
836 cmp $3, %r8
837 je L(StrncatExit3)
838 cmpb $0, 3(%rcx)
839 jz L(Exit4)
840 cmp $4, %r8
841 je L(StrncatExit4)
842 cmpb $0, 4(%rcx)
843 jz L(Exit5)
844 cmp $5, %r8
845 je L(StrncatExit5)
846 cmpb $0, 5(%rcx)
847 jz L(Exit6)
848 cmp $6, %r8
849 je L(StrncatExit6)
850 cmpb $0, 6(%rcx)
851 jz L(Exit7)
852 cmp $7, %r8
853 je L(StrncatExit7)
854 movlpd (%rcx), %xmm0
855 movlpd %xmm0, (%rdx)
856 lea 7(%rdx), %rax
857 cmpb $1, (%rax)
858 sbb $-1, %rax
859 xor %cl, %cl
860 movb %cl, (%rax)
861 mov %rdi, %rax
862 ret
863
864# endif
865END (STRCAT)
866#endif
867

source code of glibc/sysdeps/x86_64/multiarch/strcat-ssse3.S