1/* strcat with SSE2
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19
20#if IS_IN (libc)
21
22# include <sysdep.h>
23
24
25# define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
28
29# define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
31 cfi_restore (REG)
32
33# define PUSH(REG) pushl REG; CFI_PUSH (REG)
34# define POP(REG) popl REG; CFI_POP (REG)
35
36# ifdef PIC
37# define JMPTBL(I, B) I - B
38
39/* Load an entry in a jump table into ECX and branch to it. TABLE is a
40 jump table with relative offsets. INDEX is a register contains the
41 index into the jump table. SCALE is the scale of INDEX. */
42
43# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
44 /* We first load PC into ECX. */ \
45 SETUP_PIC_REG(cx); \
46 /* Get the address of the jump table. */ \
47 addl $(TABLE - .), %ecx; \
48 /* Get the entry and convert the relative offset to the \
49 absolute address. */ \
50 addl (%ecx,INDEX,SCALE), %ecx; \
51 /* We loaded the jump table and adjusted ECX. Go. */ \
52 jmp *%ecx
53# else
54# define JMPTBL(I, B) I
55
56/* Branch to an entry in a jump table. TABLE is a jump table with
57 absolute offsets. INDEX is a register contains the index into the
58 jump table. SCALE is the scale of INDEX. */
59
60# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
61 jmp *TABLE(,INDEX,SCALE)
62# endif
63
64# ifndef STRCAT
65# define STRCAT __strcat_sse2
66# endif
67
68# define PARMS 4
69# define STR1 PARMS+4
70# define STR2 STR1+4
71
72# ifdef USE_AS_STRNCAT
73# define LEN STR2+8
74# define STR3 STR1+4
75# else
76# define STR3 STR1
77# endif
78
79# define USE_AS_STRCAT
80# ifdef USE_AS_STRNCAT
81# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
82# else
83# define RETURN POP(%esi); ret; CFI_PUSH(%esi);
84# endif
85
86.text
87ENTRY (STRCAT)
88 PUSH (%esi)
89 mov STR1(%esp), %eax
90 mov STR2(%esp), %esi
91# ifdef USE_AS_STRNCAT
92 PUSH (%ebx)
93 movl LEN(%esp), %ebx
94 test %ebx, %ebx
95 jz L(ExitZero)
96# endif
97 cmpb $0, (%esi)
98 mov %esi, %ecx
99 mov %eax, %edx
100 jz L(ExitZero)
101
102 and $63, %ecx
103 and $63, %edx
104 cmp $32, %ecx
105 ja L(StrlenCore7_1)
106 cmp $48, %edx
107 ja L(alignment_prolog)
108
109 pxor %xmm0, %xmm0
110 pxor %xmm4, %xmm4
111 pxor %xmm7, %xmm7
112 movdqu (%eax), %xmm1
113 movdqu (%esi), %xmm5
114 pcmpeqb %xmm1, %xmm0
115 movdqu 16(%esi), %xmm6
116 pmovmskb %xmm0, %ecx
117 pcmpeqb %xmm5, %xmm4
118 pcmpeqb %xmm6, %xmm7
119 test %ecx, %ecx
120 jnz L(exit_less16_)
121 mov %eax, %ecx
122 and $-16, %eax
123 jmp L(loop_prolog)
124
125L(alignment_prolog):
126 pxor %xmm0, %xmm0
127 pxor %xmm4, %xmm4
128 mov %edx, %ecx
129 pxor %xmm7, %xmm7
130 and $15, %ecx
131 and $-16, %eax
132 pcmpeqb (%eax), %xmm0
133 movdqu (%esi), %xmm5
134 movdqu 16(%esi), %xmm6
135 pmovmskb %xmm0, %edx
136 pcmpeqb %xmm5, %xmm4
137 shr %cl, %edx
138 pcmpeqb %xmm6, %xmm7
139 test %edx, %edx
140 jnz L(exit_less16)
141 add %eax, %ecx
142
143 pxor %xmm0, %xmm0
144L(loop_prolog):
145 pxor %xmm1, %xmm1
146 pxor %xmm2, %xmm2
147 pxor %xmm3, %xmm3
148 .p2align 4
149L(align16_loop):
150 pcmpeqb 16(%eax), %xmm0
151 pmovmskb %xmm0, %edx
152 test %edx, %edx
153 jnz L(exit16)
154
155 pcmpeqb 32(%eax), %xmm1
156 pmovmskb %xmm1, %edx
157 test %edx, %edx
158 jnz L(exit32)
159
160 pcmpeqb 48(%eax), %xmm2
161 pmovmskb %xmm2, %edx
162 test %edx, %edx
163 jnz L(exit48)
164
165 pcmpeqb 64(%eax), %xmm3
166 pmovmskb %xmm3, %edx
167 lea 64(%eax), %eax
168 test %edx, %edx
169 jz L(align16_loop)
170 bsf %edx, %edx
171 add %edx, %eax
172 jmp L(StartStrcpyPart)
173
174 .p2align 4
175L(exit16):
176 bsf %edx, %edx
177 lea 16(%eax, %edx), %eax
178 jmp L(StartStrcpyPart)
179
180 .p2align 4
181L(exit32):
182 bsf %edx, %edx
183 lea 32(%eax, %edx), %eax
184 jmp L(StartStrcpyPart)
185
186 .p2align 4
187L(exit48):
188 bsf %edx, %edx
189 lea 48(%eax, %edx), %eax
190 jmp L(StartStrcpyPart)
191
192 .p2align 4
193L(exit_less16):
194 bsf %edx, %edx
195 add %ecx, %eax
196 add %edx, %eax
197 jmp L(StartStrcpyPart)
198
199 .p2align 4
200L(exit_less16_):
201 bsf %ecx, %ecx
202 add %ecx, %eax
203
204 .p2align 4
205L(StartStrcpyPart):
206 pmovmskb %xmm4, %edx
207# ifdef USE_AS_STRNCAT
208 cmp $16, %ebx
209 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
210# endif
211 test %edx, %edx
212 jnz L(CopyFrom1To16BytesTail1)
213
214 movdqu %xmm5, (%eax)
215 pmovmskb %xmm7, %edx
216# ifdef USE_AS_STRNCAT
217 cmp $32, %ebx
218 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
219# endif
220 test %edx, %edx
221 jnz L(CopyFrom1To32Bytes1)
222
223 mov %esi, %ecx
224 and $-16, %esi
225 and $15, %ecx
226 pxor %xmm0, %xmm0
227# ifdef USE_AS_STRNCAT
228 add %ecx, %ebx
229 sbb %edx, %edx
230 or %edx, %ebx
231# endif
232 sub %ecx, %eax
233 jmp L(Unalign16Both)
234
235L(StrlenCore7_1):
236 mov %eax, %ecx
237 pxor %xmm0, %xmm0
238 and $15, %ecx
239 and $-16, %eax
240 pcmpeqb (%eax), %xmm0
241 pmovmskb %xmm0, %edx
242 shr %cl, %edx
243 test %edx, %edx
244 jnz L(exit_less16_1)
245 add %eax, %ecx
246
247 pxor %xmm0, %xmm0
248 pxor %xmm1, %xmm1
249 pxor %xmm2, %xmm2
250 pxor %xmm3, %xmm3
251
252 .p2align 4
253L(align16_loop_1):
254 pcmpeqb 16(%eax), %xmm0
255 pmovmskb %xmm0, %edx
256 test %edx, %edx
257 jnz L(exit16_1)
258
259 pcmpeqb 32(%eax), %xmm1
260 pmovmskb %xmm1, %edx
261 test %edx, %edx
262 jnz L(exit32_1)
263
264 pcmpeqb 48(%eax), %xmm2
265 pmovmskb %xmm2, %edx
266 test %edx, %edx
267 jnz L(exit48_1)
268
269 pcmpeqb 64(%eax), %xmm3
270 pmovmskb %xmm3, %edx
271 lea 64(%eax), %eax
272 test %edx, %edx
273 jz L(align16_loop_1)
274 bsf %edx, %edx
275 add %edx, %eax
276 jmp L(StartStrcpyPart_1)
277
278 .p2align 4
279L(exit16_1):
280 bsf %edx, %edx
281 lea 16(%eax, %edx), %eax
282 jmp L(StartStrcpyPart_1)
283
284 .p2align 4
285L(exit32_1):
286 bsf %edx, %edx
287 lea 32(%eax, %edx), %eax
288 jmp L(StartStrcpyPart_1)
289
290 .p2align 4
291L(exit48_1):
292 bsf %edx, %edx
293 lea 48(%eax, %edx), %eax
294 jmp L(StartStrcpyPart_1)
295
296 .p2align 4
297L(exit_less16_1):
298 bsf %edx, %edx
299 add %ecx, %eax
300 add %edx, %eax
301
302 .p2align 4
303L(StartStrcpyPart_1):
304 mov %esi, %ecx
305 and $15, %ecx
306 and $-16, %esi
307 pxor %xmm0, %xmm0
308 pxor %xmm1, %xmm1
309
310# ifdef USE_AS_STRNCAT
311 cmp $48, %ebx
312 ja L(BigN)
313# endif
314 pcmpeqb (%esi), %xmm1
315# ifdef USE_AS_STRNCAT
316 add %ecx, %ebx
317# endif
318 pmovmskb %xmm1, %edx
319 shr %cl, %edx
320# ifdef USE_AS_STRNCAT
321 cmp $16, %ebx
322 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
323# endif
324 test %edx, %edx
325 jnz L(CopyFrom1To16BytesTail)
326
327 pcmpeqb 16(%esi), %xmm0
328 pmovmskb %xmm0, %edx
329# ifdef USE_AS_STRNCAT
330 cmp $32, %ebx
331 jbe L(CopyFrom1To32BytesCase2OrCase3)
332# endif
333 test %edx, %edx
334 jnz L(CopyFrom1To32Bytes)
335
336 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
337 movdqu %xmm1, (%eax)
338 sub %ecx, %eax
339
340 .p2align 4
341L(Unalign16Both):
342 mov $16, %ecx
343 movdqa (%esi, %ecx), %xmm1
344 movaps 16(%esi, %ecx), %xmm2
345 movdqu %xmm1, (%eax, %ecx)
346 pcmpeqb %xmm2, %xmm0
347 pmovmskb %xmm0, %edx
348 add $16, %ecx
349# ifdef USE_AS_STRNCAT
350 sub $48, %ebx
351 jbe L(CopyFrom1To16BytesCase2OrCase3)
352# endif
353 test %edx, %edx
354 jnz L(CopyFrom1To16Bytes)
355L(Unalign16BothBigN):
356 movaps 16(%esi, %ecx), %xmm3
357 movdqu %xmm2, (%eax, %ecx)
358 pcmpeqb %xmm3, %xmm0
359 pmovmskb %xmm0, %edx
360 add $16, %ecx
361# ifdef USE_AS_STRNCAT
362 sub $16, %ebx
363 jbe L(CopyFrom1To16BytesCase2OrCase3)
364# endif
365 test %edx, %edx
366 jnz L(CopyFrom1To16Bytes)
367
368 movaps 16(%esi, %ecx), %xmm4
369 movdqu %xmm3, (%eax, %ecx)
370 pcmpeqb %xmm4, %xmm0
371 pmovmskb %xmm0, %edx
372 add $16, %ecx
373# ifdef USE_AS_STRNCAT
374 sub $16, %ebx
375 jbe L(CopyFrom1To16BytesCase2OrCase3)
376# endif
377 test %edx, %edx
378 jnz L(CopyFrom1To16Bytes)
379
380 movaps 16(%esi, %ecx), %xmm1
381 movdqu %xmm4, (%eax, %ecx)
382 pcmpeqb %xmm1, %xmm0
383 pmovmskb %xmm0, %edx
384 add $16, %ecx
385# ifdef USE_AS_STRNCAT
386 sub $16, %ebx
387 jbe L(CopyFrom1To16BytesCase2OrCase3)
388# endif
389 test %edx, %edx
390 jnz L(CopyFrom1To16Bytes)
391
392 movaps 16(%esi, %ecx), %xmm2
393 movdqu %xmm1, (%eax, %ecx)
394 pcmpeqb %xmm2, %xmm0
395 pmovmskb %xmm0, %edx
396 add $16, %ecx
397# ifdef USE_AS_STRNCAT
398 sub $16, %ebx
399 jbe L(CopyFrom1To16BytesCase2OrCase3)
400# endif
401 test %edx, %edx
402 jnz L(CopyFrom1To16Bytes)
403
404 movaps 16(%esi, %ecx), %xmm3
405 movdqu %xmm2, (%eax, %ecx)
406 pcmpeqb %xmm3, %xmm0
407 pmovmskb %xmm0, %edx
408 add $16, %ecx
409# ifdef USE_AS_STRNCAT
410 sub $16, %ebx
411 jbe L(CopyFrom1To16BytesCase2OrCase3)
412# endif
413 test %edx, %edx
414 jnz L(CopyFrom1To16Bytes)
415
416 movdqu %xmm3, (%eax, %ecx)
417 mov %esi, %edx
418 lea 16(%esi, %ecx), %esi
419 and $-0x40, %esi
420 sub %esi, %edx
421 sub %edx, %eax
422# ifdef USE_AS_STRNCAT
423 lea 128(%ebx, %edx), %ebx
424# endif
425 movaps (%esi), %xmm2
426 movaps %xmm2, %xmm4
427 movaps 16(%esi), %xmm5
428 movaps 32(%esi), %xmm3
429 movaps %xmm3, %xmm6
430 movaps 48(%esi), %xmm7
431 pminub %xmm5, %xmm2
432 pminub %xmm7, %xmm3
433 pminub %xmm2, %xmm3
434 pcmpeqb %xmm0, %xmm3
435 pmovmskb %xmm3, %edx
436# ifdef USE_AS_STRNCAT
437 sub $64, %ebx
438 jbe L(UnalignedLeaveCase2OrCase3)
439# endif
440 test %edx, %edx
441 jnz L(Unaligned64Leave)
442
443 .p2align 4
444L(Unaligned64Loop_start):
445 add $64, %eax
446 add $64, %esi
447 movdqu %xmm4, -64(%eax)
448 movaps (%esi), %xmm2
449 movdqa %xmm2, %xmm4
450 movdqu %xmm5, -48(%eax)
451 movaps 16(%esi), %xmm5
452 pminub %xmm5, %xmm2
453 movaps 32(%esi), %xmm3
454 movdqu %xmm6, -32(%eax)
455 movaps %xmm3, %xmm6
456 movdqu %xmm7, -16(%eax)
457 movaps 48(%esi), %xmm7
458 pminub %xmm7, %xmm3
459 pminub %xmm2, %xmm3
460 pcmpeqb %xmm0, %xmm3
461 pmovmskb %xmm3, %edx
462# ifdef USE_AS_STRNCAT
463 sub $64, %ebx
464 jbe L(UnalignedLeaveCase2OrCase3)
465# endif
466 test %edx, %edx
467 jz L(Unaligned64Loop_start)
468
469L(Unaligned64Leave):
470 pxor %xmm1, %xmm1
471
472 pcmpeqb %xmm4, %xmm0
473 pcmpeqb %xmm5, %xmm1
474 pmovmskb %xmm0, %edx
475 pmovmskb %xmm1, %ecx
476 test %edx, %edx
477 jnz L(CopyFrom1To16BytesUnaligned_0)
478 test %ecx, %ecx
479 jnz L(CopyFrom1To16BytesUnaligned_16)
480
481 pcmpeqb %xmm6, %xmm0
482 pcmpeqb %xmm7, %xmm1
483 pmovmskb %xmm0, %edx
484 pmovmskb %xmm1, %ecx
485 test %edx, %edx
486 jnz L(CopyFrom1To16BytesUnaligned_32)
487
488 bsf %ecx, %edx
489 movdqu %xmm4, (%eax)
490 movdqu %xmm5, 16(%eax)
491 movdqu %xmm6, 32(%eax)
492 add $48, %esi
493 add $48, %eax
494 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
495
496# ifdef USE_AS_STRNCAT
497 .p2align 4
498L(BigN):
499 pcmpeqb (%esi), %xmm1
500 pmovmskb %xmm1, %edx
501 shr %cl, %edx
502 test %edx, %edx
503 jnz L(CopyFrom1To16BytesTail)
504
505 pcmpeqb 16(%esi), %xmm0
506 pmovmskb %xmm0, %edx
507 test %edx, %edx
508 jnz L(CopyFrom1To32Bytes)
509
510 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
511 movdqu %xmm1, (%eax)
512 sub %ecx, %eax
513 sub $48, %ebx
514 add %ecx, %ebx
515
516 mov $16, %ecx
517 movdqa (%esi, %ecx), %xmm1
518 movaps 16(%esi, %ecx), %xmm2
519 movdqu %xmm1, (%eax, %ecx)
520 pcmpeqb %xmm2, %xmm0
521 pmovmskb %xmm0, %edx
522 add $16, %ecx
523 test %edx, %edx
524 jnz L(CopyFrom1To16Bytes)
525 jmp L(Unalign16BothBigN)
526# endif
527
528/*------------end of main part-------------------------------*/
529
530/* Case1 */
531 .p2align 4
532L(CopyFrom1To16Bytes):
533 add %ecx, %eax
534 add %ecx, %esi
535 bsf %edx, %edx
536 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
537
538 .p2align 4
539L(CopyFrom1To16BytesTail):
540 add %ecx, %esi
541 bsf %edx, %edx
542 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
543
544 .p2align 4
545L(CopyFrom1To32Bytes1):
546 add $16, %esi
547 add $16, %eax
548L(CopyFrom1To16BytesTail1):
549 bsf %edx, %edx
550 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
551
552 .p2align 4
553L(CopyFrom1To32Bytes):
554 bsf %edx, %edx
555 add %ecx, %esi
556 add $16, %edx
557 sub %ecx, %edx
558 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
559
560 .p2align 4
561L(CopyFrom1To16BytesUnaligned_0):
562 bsf %edx, %edx
563 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
564
565 .p2align 4
566L(CopyFrom1To16BytesUnaligned_16):
567 bsf %ecx, %edx
568 movdqu %xmm4, (%eax)
569 add $16, %esi
570 add $16, %eax
571 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
572
573 .p2align 4
574L(CopyFrom1To16BytesUnaligned_32):
575 bsf %edx, %edx
576 movdqu %xmm4, (%eax)
577 movdqu %xmm5, 16(%eax)
578 add $32, %esi
579 add $32, %eax
580 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
581
582# ifdef USE_AS_STRNCAT
583
584 .p2align 4
585L(CopyFrom1To16BytesExit):
586 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
587
588/* Case2 */
589
590 .p2align 4
591L(CopyFrom1To16BytesCase2):
592 add $16, %ebx
593 add %ecx, %eax
594 add %ecx, %esi
595 bsf %edx, %edx
596 cmp %ebx, %edx
597 jb L(CopyFrom1To16BytesExit)
598 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
599
600 .p2align 4
601L(CopyFrom1To32BytesCase2):
602 sub %ecx, %ebx
603 add %ecx, %esi
604 bsf %edx, %edx
605 add $16, %edx
606 sub %ecx, %edx
607 cmp %ebx, %edx
608 jb L(CopyFrom1To16BytesExit)
609 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
610
611L(CopyFrom1To16BytesTailCase2):
612 sub %ecx, %ebx
613 add %ecx, %esi
614 bsf %edx, %edx
615 cmp %ebx, %edx
616 jb L(CopyFrom1To16BytesExit)
617 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
618
619L(CopyFrom1To16BytesTail1Case2):
620 bsf %edx, %edx
621 cmp %ebx, %edx
622 jb L(CopyFrom1To16BytesExit)
623 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
624
625/* Case2 or Case3, Case3 */
626
627 .p2align 4
628L(CopyFrom1To16BytesCase2OrCase3):
629 test %edx, %edx
630 jnz L(CopyFrom1To16BytesCase2)
631L(CopyFrom1To16BytesCase3):
632 add $16, %ebx
633 add %ecx, %eax
634 add %ecx, %esi
635 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
636
637 .p2align 4
638L(CopyFrom1To32BytesCase2OrCase3):
639 test %edx, %edx
640 jnz L(CopyFrom1To32BytesCase2)
641 sub %ecx, %ebx
642 add %ecx, %esi
643 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
644
645 .p2align 4
646L(CopyFrom1To16BytesTailCase2OrCase3):
647 test %edx, %edx
648 jnz L(CopyFrom1To16BytesTailCase2)
649 sub %ecx, %ebx
650 add %ecx, %esi
651 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
652
653 .p2align 4
654L(CopyFrom1To32Bytes1Case2OrCase3):
655 add $16, %eax
656 add $16, %esi
657 sub $16, %ebx
658L(CopyFrom1To16BytesTail1Case2OrCase3):
659 test %edx, %edx
660 jnz L(CopyFrom1To16BytesTail1Case2)
661 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
662
663# endif
664
665# ifdef USE_AS_STRNCAT
666 .p2align 4
667L(StrncatExit0):
668 movb %bh, (%eax)
669 mov STR3(%esp), %eax
670 RETURN
671# endif
672
673 .p2align 4
674# ifdef USE_AS_STRNCAT
675L(StrncatExit1):
676 movb %bh, 1(%eax)
677# endif
678L(Exit1):
679# ifdef USE_AS_STRNCAT
680 movb (%esi), %dh
681# endif
682 movb %dh, (%eax)
683 mov STR3(%esp), %eax
684 RETURN
685
686 .p2align 4
687# ifdef USE_AS_STRNCAT
688L(StrncatExit2):
689 movb %bh, 2(%eax)
690# endif
691L(Exit2):
692 movw (%esi), %dx
693 movw %dx, (%eax)
694 mov STR3(%esp), %eax
695 RETURN
696
697 .p2align 4
698# ifdef USE_AS_STRNCAT
699L(StrncatExit3):
700 movb %bh, 3(%eax)
701# endif
702L(Exit3):
703 movw (%esi), %cx
704 movw %cx, (%eax)
705# ifdef USE_AS_STRNCAT
706 movb 2(%esi), %dh
707# endif
708 movb %dh, 2(%eax)
709 mov STR3(%esp), %eax
710 RETURN
711
712 .p2align 4
713# ifdef USE_AS_STRNCAT
714L(StrncatExit4):
715 movb %bh, 4(%eax)
716# endif
717L(Exit4):
718 movl (%esi), %edx
719 movl %edx, (%eax)
720 mov STR3(%esp), %eax
721 RETURN
722
723 .p2align 4
724# ifdef USE_AS_STRNCAT
725L(StrncatExit5):
726 movb %bh, 5(%eax)
727# endif
728L(Exit5):
729 movl (%esi), %ecx
730# ifdef USE_AS_STRNCAT
731 movb 4(%esi), %dh
732# endif
733 movb %dh, 4(%eax)
734 movl %ecx, (%eax)
735 mov STR3(%esp), %eax
736 RETURN
737
738 .p2align 4
739# ifdef USE_AS_STRNCAT
740L(StrncatExit6):
741 movb %bh, 6(%eax)
742# endif
743L(Exit6):
744 movl (%esi), %ecx
745 movw 4(%esi), %dx
746 movl %ecx, (%eax)
747 movw %dx, 4(%eax)
748 mov STR3(%esp), %eax
749 RETURN
750
751 .p2align 4
752# ifdef USE_AS_STRNCAT
753L(StrncatExit7):
754 movb %bh, 7(%eax)
755# endif
756L(Exit7):
757 movl (%esi), %ecx
758 movl 3(%esi), %edx
759 movl %ecx, (%eax)
760 movl %edx, 3(%eax)
761 mov STR3(%esp), %eax
762 RETURN
763
764 .p2align 4
765# ifdef USE_AS_STRNCAT
766L(StrncatExit8):
767 movb %bh, 8(%eax)
768# endif
769L(Exit8):
770 movlpd (%esi), %xmm0
771 movlpd %xmm0, (%eax)
772 mov STR3(%esp), %eax
773 RETURN
774
775 .p2align 4
776# ifdef USE_AS_STRNCAT
777L(StrncatExit9):
778 movb %bh, 9(%eax)
779# endif
780L(Exit9):
781 movlpd (%esi), %xmm0
782# ifdef USE_AS_STRNCAT
783 movb 8(%esi), %dh
784# endif
785 movb %dh, 8(%eax)
786 movlpd %xmm0, (%eax)
787 mov STR3(%esp), %eax
788 RETURN
789
790 .p2align 4
791# ifdef USE_AS_STRNCAT
792L(StrncatExit10):
793 movb %bh, 10(%eax)
794# endif
795L(Exit10):
796 movlpd (%esi), %xmm0
797 movw 8(%esi), %dx
798 movlpd %xmm0, (%eax)
799 movw %dx, 8(%eax)
800 mov STR3(%esp), %eax
801 RETURN
802
803 .p2align 4
804# ifdef USE_AS_STRNCAT
805L(StrncatExit11):
806 movb %bh, 11(%eax)
807# endif
808L(Exit11):
809 movlpd (%esi), %xmm0
810 movl 7(%esi), %edx
811 movlpd %xmm0, (%eax)
812 movl %edx, 7(%eax)
813 mov STR3(%esp), %eax
814 RETURN
815
816 .p2align 4
817# ifdef USE_AS_STRNCAT
818L(StrncatExit12):
819 movb %bh, 12(%eax)
820# endif
821L(Exit12):
822 movlpd (%esi), %xmm0
823 movl 8(%esi), %edx
824 movlpd %xmm0, (%eax)
825 movl %edx, 8(%eax)
826 mov STR3(%esp), %eax
827 RETURN
828
829 .p2align 4
830# ifdef USE_AS_STRNCAT
831L(StrncatExit13):
832 movb %bh, 13(%eax)
833# endif
834L(Exit13):
835 movlpd (%esi), %xmm0
836 movlpd 5(%esi), %xmm1
837 movlpd %xmm0, (%eax)
838 movlpd %xmm1, 5(%eax)
839 mov STR3(%esp), %eax
840 RETURN
841
842 .p2align 4
843# ifdef USE_AS_STRNCAT
844L(StrncatExit14):
845 movb %bh, 14(%eax)
846# endif
847L(Exit14):
848 movlpd (%esi), %xmm0
849 movlpd 6(%esi), %xmm1
850 movlpd %xmm0, (%eax)
851 movlpd %xmm1, 6(%eax)
852 mov STR3(%esp), %eax
853 RETURN
854
855 .p2align 4
856# ifdef USE_AS_STRNCAT
857L(StrncatExit15):
858 movb %bh, 15(%eax)
859# endif
860L(Exit15):
861 movlpd (%esi), %xmm0
862 movlpd 7(%esi), %xmm1
863 movlpd %xmm0, (%eax)
864 movlpd %xmm1, 7(%eax)
865 mov STR3(%esp), %eax
866 RETURN
867
868 .p2align 4
869# ifdef USE_AS_STRNCAT
870L(StrncatExit16):
871 movb %bh, 16(%eax)
872# endif
873L(Exit16):
874 movdqu (%esi), %xmm0
875 movdqu %xmm0, (%eax)
876 mov STR3(%esp), %eax
877 RETURN
878
879 .p2align 4
880# ifdef USE_AS_STRNCAT
881L(StrncatExit17):
882 movb %bh, 17(%eax)
883# endif
884L(Exit17):
885 movdqu (%esi), %xmm0
886# ifdef USE_AS_STRNCAT
887 movb 16(%esi), %dh
888# endif
889 movdqu %xmm0, (%eax)
890 movb %dh, 16(%eax)
891 mov STR3(%esp), %eax
892 RETURN
893
894 .p2align 4
895# ifdef USE_AS_STRNCAT
896L(StrncatExit18):
897 movb %bh, 18(%eax)
898# endif
899L(Exit18):
900 movdqu (%esi), %xmm0
901 movw 16(%esi), %cx
902 movdqu %xmm0, (%eax)
903 movw %cx, 16(%eax)
904 mov STR3(%esp), %eax
905 RETURN
906
907 .p2align 4
908# ifdef USE_AS_STRNCAT
909L(StrncatExit19):
910 movb %bh, 19(%eax)
911# endif
912L(Exit19):
913 movdqu (%esi), %xmm0
914 movl 15(%esi), %ecx
915 movdqu %xmm0, (%eax)
916 movl %ecx, 15(%eax)
917 mov STR3(%esp), %eax
918 RETURN
919
920 .p2align 4
921# ifdef USE_AS_STRNCAT
922L(StrncatExit20):
923 movb %bh, 20(%eax)
924# endif
925L(Exit20):
926 movdqu (%esi), %xmm0
927 movl 16(%esi), %ecx
928 movdqu %xmm0, (%eax)
929 movl %ecx, 16(%eax)
930 mov STR3(%esp), %eax
931 RETURN
932
933 .p2align 4
934# ifdef USE_AS_STRNCAT
935L(StrncatExit21):
936 movb %bh, 21(%eax)
937# endif
938L(Exit21):
939 movdqu (%esi), %xmm0
940 movl 16(%esi), %ecx
941# ifdef USE_AS_STRNCAT
942 movb 20(%esi), %dh
943# endif
944 movdqu %xmm0, (%eax)
945 movl %ecx, 16(%eax)
946 movb %dh, 20(%eax)
947 mov STR3(%esp), %eax
948 RETURN
949
950 .p2align 4
951# ifdef USE_AS_STRNCAT
952L(StrncatExit22):
953 movb %bh, 22(%eax)
954# endif
955L(Exit22):
956 movdqu (%esi), %xmm0
957 movlpd 14(%esi), %xmm3
958 movdqu %xmm0, (%eax)
959 movlpd %xmm3, 14(%eax)
960 mov STR3(%esp), %eax
961 RETURN
962
963 .p2align 4
964# ifdef USE_AS_STRNCAT
965L(StrncatExit23):
966 movb %bh, 23(%eax)
967# endif
968L(Exit23):
969 movdqu (%esi), %xmm0
970 movlpd 15(%esi), %xmm3
971 movdqu %xmm0, (%eax)
972 movlpd %xmm3, 15(%eax)
973 mov STR3(%esp), %eax
974 RETURN
975
976 .p2align 4
977# ifdef USE_AS_STRNCAT
978L(StrncatExit24):
979 movb %bh, 24(%eax)
980# endif
981L(Exit24):
982 movdqu (%esi), %xmm0
983 movlpd 16(%esi), %xmm2
984 movdqu %xmm0, (%eax)
985 movlpd %xmm2, 16(%eax)
986 mov STR3(%esp), %eax
987 RETURN
988
989 .p2align 4
990# ifdef USE_AS_STRNCAT
991L(StrncatExit25):
992 movb %bh, 25(%eax)
993# endif
994L(Exit25):
995 movdqu (%esi), %xmm0
996 movlpd 16(%esi), %xmm2
997# ifdef USE_AS_STRNCAT
998 movb 24(%esi), %dh
999# endif
1000 movdqu %xmm0, (%eax)
1001 movlpd %xmm2, 16(%eax)
1002 movb %dh, 24(%eax)
1003 mov STR3(%esp), %eax
1004 RETURN
1005
1006 .p2align 4
1007# ifdef USE_AS_STRNCAT
1008L(StrncatExit26):
1009 movb %bh, 26(%eax)
1010# endif
1011L(Exit26):
1012 movdqu (%esi), %xmm0
1013 movlpd 16(%esi), %xmm2
1014 movw 24(%esi), %cx
1015 movdqu %xmm0, (%eax)
1016 movlpd %xmm2, 16(%eax)
1017 movw %cx, 24(%eax)
1018 mov STR3(%esp), %eax
1019 RETURN
1020
1021 .p2align 4
1022# ifdef USE_AS_STRNCAT
1023L(StrncatExit27):
1024 movb %bh, 27(%eax)
1025# endif
1026L(Exit27):
1027 movdqu (%esi), %xmm0
1028 movlpd 16(%esi), %xmm2
1029 movl 23(%esi), %ecx
1030 movdqu %xmm0, (%eax)
1031 movlpd %xmm2, 16(%eax)
1032 movl %ecx, 23(%eax)
1033 mov STR3(%esp), %eax
1034 RETURN
1035
1036 .p2align 4
1037# ifdef USE_AS_STRNCAT
1038L(StrncatExit28):
1039 movb %bh, 28(%eax)
1040# endif
1041L(Exit28):
1042 movdqu (%esi), %xmm0
1043 movlpd 16(%esi), %xmm2
1044 movl 24(%esi), %ecx
1045 movdqu %xmm0, (%eax)
1046 movlpd %xmm2, 16(%eax)
1047 movl %ecx, 24(%eax)
1048 mov STR3(%esp), %eax
1049 RETURN
1050
1051 .p2align 4
1052# ifdef USE_AS_STRNCAT
1053L(StrncatExit29):
1054 movb %bh, 29(%eax)
1055# endif
1056L(Exit29):
1057 movdqu (%esi), %xmm0
1058 movdqu 13(%esi), %xmm2
1059 movdqu %xmm0, (%eax)
1060 movdqu %xmm2, 13(%eax)
1061 mov STR3(%esp), %eax
1062 RETURN
1063
1064 .p2align 4
1065# ifdef USE_AS_STRNCAT
1066L(StrncatExit30):
1067 movb %bh, 30(%eax)
1068# endif
1069L(Exit30):
1070 movdqu (%esi), %xmm0
1071 movdqu 14(%esi), %xmm2
1072 movdqu %xmm0, (%eax)
1073 movdqu %xmm2, 14(%eax)
1074 mov STR3(%esp), %eax
1075 RETURN
1076
1077 .p2align 4
1078# ifdef USE_AS_STRNCAT
1079L(StrncatExit31):
1080 movb %bh, 31(%eax)
1081# endif
1082L(Exit31):
1083 movdqu (%esi), %xmm0
1084 movdqu 15(%esi), %xmm2
1085 movdqu %xmm0, (%eax)
1086 movdqu %xmm2, 15(%eax)
1087 mov STR3(%esp), %eax
1088 RETURN
1089
1090 .p2align 4
1091# ifdef USE_AS_STRNCAT
1092L(StrncatExit32):
1093 movb %bh, 32(%eax)
1094# endif
1095L(Exit32):
1096 movdqu (%esi), %xmm0
1097 movdqu 16(%esi), %xmm2
1098 movdqu %xmm0, (%eax)
1099 movdqu %xmm2, 16(%eax)
1100 mov STR3(%esp), %eax
1101 RETURN
1102
1103# ifdef USE_AS_STRNCAT
1104
1105 .p2align 4
1106L(UnalignedLeaveCase2OrCase3):
1107 test %edx, %edx
1108 jnz L(Unaligned64LeaveCase2)
1109L(Unaligned64LeaveCase3):
1110 lea 64(%ebx), %ecx
1111 and $-16, %ecx
1112 add $48, %ebx
1113 jl L(CopyFrom1To16BytesCase3)
1114 movdqu %xmm4, (%eax)
1115 sub $16, %ebx
1116 jb L(CopyFrom1To16BytesCase3)
1117 movdqu %xmm5, 16(%eax)
1118 sub $16, %ebx
1119 jb L(CopyFrom1To16BytesCase3)
1120 movdqu %xmm6, 32(%eax)
1121 sub $16, %ebx
1122 jb L(CopyFrom1To16BytesCase3)
1123 movdqu %xmm7, 48(%eax)
1124 xor %bh, %bh
1125 movb %bh, 64(%eax)
1126 mov STR3(%esp), %eax
1127 RETURN
1128
1129 .p2align 4
1130L(Unaligned64LeaveCase2):
1131 xor %ecx, %ecx
1132 pcmpeqb %xmm4, %xmm0
1133 pmovmskb %xmm0, %edx
1134 add $48, %ebx
1135 jle L(CopyFrom1To16BytesCase2OrCase3)
1136 test %edx, %edx
1137 jnz L(CopyFrom1To16Bytes)
1138
1139 pcmpeqb %xmm5, %xmm0
1140 pmovmskb %xmm0, %edx
1141 movdqu %xmm4, (%eax)
1142 add $16, %ecx
1143 sub $16, %ebx
1144 jbe L(CopyFrom1To16BytesCase2OrCase3)
1145 test %edx, %edx
1146 jnz L(CopyFrom1To16Bytes)
1147
1148 pcmpeqb %xmm6, %xmm0
1149 pmovmskb %xmm0, %edx
1150 movdqu %xmm5, 16(%eax)
1151 add $16, %ecx
1152 sub $16, %ebx
1153 jbe L(CopyFrom1To16BytesCase2OrCase3)
1154 test %edx, %edx
1155 jnz L(CopyFrom1To16Bytes)
1156
1157 pcmpeqb %xmm7, %xmm0
1158 pmovmskb %xmm0, %edx
1159 movdqu %xmm6, 32(%eax)
1160 lea 16(%eax, %ecx), %eax
1161 lea 16(%esi, %ecx), %esi
1162 bsf %edx, %edx
1163 cmp %ebx, %edx
1164 jb L(CopyFrom1To16BytesExit)
1165 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
1166# endif
1167 .p2align 4
1168L(ExitZero):
1169 RETURN
1170
1171END (STRCAT)
1172
1173 .p2align 4
1174 .section .rodata
1175L(ExitTable):
1176 .int JMPTBL(L(Exit1), L(ExitTable))
1177 .int JMPTBL(L(Exit2), L(ExitTable))
1178 .int JMPTBL(L(Exit3), L(ExitTable))
1179 .int JMPTBL(L(Exit4), L(ExitTable))
1180 .int JMPTBL(L(Exit5), L(ExitTable))
1181 .int JMPTBL(L(Exit6), L(ExitTable))
1182 .int JMPTBL(L(Exit7), L(ExitTable))
1183 .int JMPTBL(L(Exit8), L(ExitTable))
1184 .int JMPTBL(L(Exit9), L(ExitTable))
1185 .int JMPTBL(L(Exit10), L(ExitTable))
1186 .int JMPTBL(L(Exit11), L(ExitTable))
1187 .int JMPTBL(L(Exit12), L(ExitTable))
1188 .int JMPTBL(L(Exit13), L(ExitTable))
1189 .int JMPTBL(L(Exit14), L(ExitTable))
1190 .int JMPTBL(L(Exit15), L(ExitTable))
1191 .int JMPTBL(L(Exit16), L(ExitTable))
1192 .int JMPTBL(L(Exit17), L(ExitTable))
1193 .int JMPTBL(L(Exit18), L(ExitTable))
1194 .int JMPTBL(L(Exit19), L(ExitTable))
1195 .int JMPTBL(L(Exit20), L(ExitTable))
1196 .int JMPTBL(L(Exit21), L(ExitTable))
1197 .int JMPTBL(L(Exit22), L(ExitTable))
1198 .int JMPTBL(L(Exit23), L(ExitTable))
1199 .int JMPTBL(L(Exit24), L(ExitTable))
1200 .int JMPTBL(L(Exit25), L(ExitTable))
1201 .int JMPTBL(L(Exit26), L(ExitTable))
1202 .int JMPTBL(L(Exit27), L(ExitTable))
1203 .int JMPTBL(L(Exit28), L(ExitTable))
1204 .int JMPTBL(L(Exit29), L(ExitTable))
1205 .int JMPTBL(L(Exit30), L(ExitTable))
1206 .int JMPTBL(L(Exit31), L(ExitTable))
1207 .int JMPTBL(L(Exit32), L(ExitTable))
1208# ifdef USE_AS_STRNCAT
1209L(ExitStrncatTable):
1210 .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
1211 .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
1212 .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
1213 .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
1214 .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
1215 .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
1216 .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
1217 .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
1218 .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
1219 .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
1220 .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
1221 .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
1222 .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
1223 .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
1224 .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
1225 .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
1226 .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
1227 .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
1228 .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
1229 .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
1230 .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
1231 .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
1232 .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
1233 .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
1234 .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
1235 .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
1236 .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
1237 .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
1238 .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
1239 .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
1240 .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
1241 .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
1242 .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
1243# endif
1244#endif
1245

source code of glibc/sysdeps/i386/i686/multiarch/strcat-sse2.S