1/* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19
20#if IS_IN (libc)
21
22# include <sysdep.h>
23
24
25# define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
28
29# define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
31 cfi_restore (REG)
32
33# define PUSH(REG) pushl REG; CFI_PUSH (REG)
34# define POP(REG) popl REG; CFI_POP (REG)
35
36# ifndef STRCPY
37# define STRCPY __strcpy_sse2
38# endif
39
40# define STR1 PARMS
41# define STR2 STR1+4
42# define LEN STR2+4
43
44# ifdef USE_AS_STRNCPY
45# define PARMS 16
46# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
47# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \
48 CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
49
50# ifdef PIC
51# define JMPTBL(I, B) I - B
52
53/* Load an entry in a jump table into ECX and branch to it. TABLE is a
54 jump table with relative offsets.
55 INDEX is a register contains the index into the jump table.
56 SCALE is the scale of INDEX. */
57
58# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
59 /* We first load PC into ECX. */ \
60 SETUP_PIC_REG(cx); \
61 /* Get the address of the jump table. */ \
62 addl $(TABLE - .), %ecx; \
63 /* Get the entry and convert the relative offset to the \
64 absolute address. */ \
65 addl (%ecx,INDEX,SCALE), %ecx; \
66 /* We loaded the jump table and adjusted ECX. Go. */ \
67 jmp *%ecx
68# else
69# define JMPTBL(I, B) I
70
71/* Branch to an entry in a jump table. TABLE is a jump table with
72 absolute offsets. INDEX is a register contains the index into the
73 jump table. SCALE is the scale of INDEX. */
74
75# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
76 jmp *TABLE(,INDEX,SCALE)
77# endif
78
79.text
80ENTRY (STRCPY)
81 ENTRANCE
82 mov STR1(%esp), %edi
83 mov STR2(%esp), %esi
84 movl LEN(%esp), %ebx
85 test %ebx, %ebx
86 jz L(ExitZero)
87
88 mov %esi, %ecx
89# ifndef USE_AS_STPCPY
90 mov %edi, %eax /* save result */
91# endif
92 and $15, %ecx
93 jz L(SourceStringAlignmentZero)
94
95 and $-16, %esi
96 pxor %xmm0, %xmm0
97 pxor %xmm1, %xmm1
98
99 pcmpeqb (%esi), %xmm1
100 add %ecx, %ebx
101 pmovmskb %xmm1, %edx
102 shr %cl, %edx
103# ifdef USE_AS_STPCPY
104 cmp $16, %ebx
105 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
106# else
107 cmp $17, %ebx
108 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
109# endif
110 test %edx, %edx
111 jnz L(CopyFrom1To16BytesTail)
112
113 pcmpeqb 16(%esi), %xmm0
114 pmovmskb %xmm0, %edx
115# ifdef USE_AS_STPCPY
116 cmp $32, %ebx
117 jbe L(CopyFrom1To32BytesCase2OrCase3)
118# else
119 cmp $33, %ebx
120 jbe L(CopyFrom1To32BytesCase2OrCase3)
121# endif
122 test %edx, %edx
123 jnz L(CopyFrom1To32Bytes)
124
125 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
126 movdqu %xmm1, (%edi)
127
128 sub %ecx, %edi
129
130/* If source address alignment != destination address alignment */
131 .p2align 4
132L(Unalign16Both):
133 mov $16, %ecx
134 movdqa (%esi, %ecx), %xmm1
135 movaps 16(%esi, %ecx), %xmm2
136 movdqu %xmm1, (%edi, %ecx)
137 pcmpeqb %xmm2, %xmm0
138 pmovmskb %xmm0, %edx
139 add $16, %ecx
140 sub $48, %ebx
141 jbe L(CopyFrom1To16BytesCase2OrCase3)
142 test %edx, %edx
143 jnz L(CopyFrom1To16BytesUnalignedXmm2)
144
145 movaps 16(%esi, %ecx), %xmm3
146 movdqu %xmm2, (%edi, %ecx)
147 pcmpeqb %xmm3, %xmm0
148 pmovmskb %xmm0, %edx
149 add $16, %ecx
150 sub $16, %ebx
151 jbe L(CopyFrom1To16BytesCase2OrCase3)
152 test %edx, %edx
153 jnz L(CopyFrom1To16BytesUnalignedXmm3)
154
155 movaps 16(%esi, %ecx), %xmm4
156 movdqu %xmm3, (%edi, %ecx)
157 pcmpeqb %xmm4, %xmm0
158 pmovmskb %xmm0, %edx
159 add $16, %ecx
160 sub $16, %ebx
161 jbe L(CopyFrom1To16BytesCase2OrCase3)
162 test %edx, %edx
163 jnz L(CopyFrom1To16BytesUnalignedXmm4)
164
165 movaps 16(%esi, %ecx), %xmm1
166 movdqu %xmm4, (%edi, %ecx)
167 pcmpeqb %xmm1, %xmm0
168 pmovmskb %xmm0, %edx
169 add $16, %ecx
170 sub $16, %ebx
171 jbe L(CopyFrom1To16BytesCase2OrCase3)
172 test %edx, %edx
173 jnz L(CopyFrom1To16BytesUnalignedXmm1)
174
175 movaps 16(%esi, %ecx), %xmm2
176 movdqu %xmm1, (%edi, %ecx)
177 pcmpeqb %xmm2, %xmm0
178 pmovmskb %xmm0, %edx
179 add $16, %ecx
180 sub $16, %ebx
181 jbe L(CopyFrom1To16BytesCase2OrCase3)
182 test %edx, %edx
183 jnz L(CopyFrom1To16BytesUnalignedXmm2)
184
185 movaps 16(%esi, %ecx), %xmm3
186 movdqu %xmm2, (%edi, %ecx)
187 pcmpeqb %xmm3, %xmm0
188 pmovmskb %xmm0, %edx
189 add $16, %ecx
190 sub $16, %ebx
191 jbe L(CopyFrom1To16BytesCase2OrCase3)
192 test %edx, %edx
193 jnz L(CopyFrom1To16BytesUnalignedXmm3)
194
195 movdqu %xmm3, (%edi, %ecx)
196 mov %esi, %edx
197 lea 16(%esi, %ecx), %esi
198 and $-0x40, %esi
199 sub %esi, %edx
200 sub %edx, %edi
201 lea 128(%ebx, %edx), %ebx
202
203L(Unaligned64Loop):
204 movaps (%esi), %xmm2
205 movaps %xmm2, %xmm4
206 movaps 16(%esi), %xmm5
207 movaps 32(%esi), %xmm3
208 movaps %xmm3, %xmm6
209 movaps 48(%esi), %xmm7
210 pminub %xmm5, %xmm2
211 pminub %xmm7, %xmm3
212 pminub %xmm2, %xmm3
213 pcmpeqb %xmm0, %xmm3
214 pmovmskb %xmm3, %edx
215 sub $64, %ebx
216 jbe L(UnalignedLeaveCase2OrCase3)
217 test %edx, %edx
218 jnz L(Unaligned64Leave)
219L(Unaligned64Loop_start):
220 add $64, %edi
221 add $64, %esi
222 movdqu %xmm4, -64(%edi)
223 movaps (%esi), %xmm2
224 movdqa %xmm2, %xmm4
225 movdqu %xmm5, -48(%edi)
226 movaps 16(%esi), %xmm5
227 pminub %xmm5, %xmm2
228 movaps 32(%esi), %xmm3
229 movdqu %xmm6, -32(%edi)
230 movaps %xmm3, %xmm6
231 movdqu %xmm7, -16(%edi)
232 movaps 48(%esi), %xmm7
233 pminub %xmm7, %xmm3
234 pminub %xmm2, %xmm3
235 pcmpeqb %xmm0, %xmm3
236 pmovmskb %xmm3, %edx
237 sub $64, %ebx
238 jbe L(UnalignedLeaveCase2OrCase3)
239 test %edx, %edx
240 jz L(Unaligned64Loop_start)
241L(Unaligned64Leave):
242 pxor %xmm1, %xmm1
243
244 pcmpeqb %xmm4, %xmm0
245 pcmpeqb %xmm5, %xmm1
246 pmovmskb %xmm0, %edx
247 pmovmskb %xmm1, %ecx
248 test %edx, %edx
249 jnz L(CopyFrom1To16BytesUnaligned_0)
250 test %ecx, %ecx
251 jnz L(CopyFrom1To16BytesUnaligned_16)
252
253 pcmpeqb %xmm6, %xmm0
254 pcmpeqb %xmm7, %xmm1
255 pmovmskb %xmm0, %edx
256 pmovmskb %xmm1, %ecx
257 test %edx, %edx
258 jnz L(CopyFrom1To16BytesUnaligned_32)
259
260 bsf %ecx, %edx
261 movdqu %xmm4, (%edi)
262 movdqu %xmm5, 16(%edi)
263 movdqu %xmm6, 32(%edi)
264# ifdef USE_AS_STPCPY
265 lea 48(%edi, %edx), %eax
266# endif
267 movdqu %xmm7, 48(%edi)
268 add $15, %ebx
269 sub %edx, %ebx
270 lea 49(%edi, %edx), %edi
271 jmp L(StrncpyFillTailWithZero)
272
273/* If source address alignment == destination address alignment */
274
275L(SourceStringAlignmentZero):
276 pxor %xmm0, %xmm0
277 movdqa (%esi), %xmm1
278 pcmpeqb %xmm1, %xmm0
279 pmovmskb %xmm0, %edx
280# ifdef USE_AS_STPCPY
281 cmp $16, %ebx
282 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
283# else
284 cmp $17, %ebx
285 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
286# endif
287 test %edx, %edx
288 jnz L(CopyFrom1To16BytesTail1)
289
290 pcmpeqb 16(%esi), %xmm0
291 movdqu %xmm1, (%edi)
292 pmovmskb %xmm0, %edx
293# ifdef USE_AS_STPCPY
294 cmp $32, %ebx
295 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
296# else
297 cmp $33, %ebx
298 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
299# endif
300 test %edx, %edx
301 jnz L(CopyFrom1To32Bytes1)
302
303 jmp L(Unalign16Both)
304
305/*-----------------End of main part---------------------------*/
306
307/* Case1 */
308 .p2align 4
309L(CopyFrom1To16BytesTail):
310 sub %ecx, %ebx
311 add %ecx, %esi
312 bsf %edx, %edx
313 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
314
315 .p2align 4
316L(CopyFrom1To32Bytes1):
317 add $16, %esi
318 add $16, %edi
319 sub $16, %ebx
320L(CopyFrom1To16BytesTail1):
321 bsf %edx, %edx
322 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
323
324 .p2align 4
325L(CopyFrom1To32Bytes):
326 sub %ecx, %ebx
327 bsf %edx, %edx
328 add %ecx, %esi
329 add $16, %edx
330 sub %ecx, %edx
331 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
332
333 .p2align 4
334L(CopyFrom1To16BytesUnaligned_0):
335 bsf %edx, %edx
336# ifdef USE_AS_STPCPY
337 lea (%edi, %edx), %eax
338# endif
339 movdqu %xmm4, (%edi)
340 add $63, %ebx
341 sub %edx, %ebx
342 lea 1(%edi, %edx), %edi
343 jmp L(StrncpyFillTailWithZero)
344
345 .p2align 4
346L(CopyFrom1To16BytesUnaligned_16):
347 bsf %ecx, %edx
348 movdqu %xmm4, (%edi)
349# ifdef USE_AS_STPCPY
350 lea 16(%edi, %edx), %eax
351# endif
352 movdqu %xmm5, 16(%edi)
353 add $47, %ebx
354 sub %edx, %ebx
355 lea 17(%edi, %edx), %edi
356 jmp L(StrncpyFillTailWithZero)
357
358 .p2align 4
359L(CopyFrom1To16BytesUnaligned_32):
360 bsf %edx, %edx
361 movdqu %xmm4, (%edi)
362 movdqu %xmm5, 16(%edi)
363# ifdef USE_AS_STPCPY
364 lea 32(%edi, %edx), %eax
365# endif
366 movdqu %xmm6, 32(%edi)
367 add $31, %ebx
368 sub %edx, %ebx
369 lea 33(%edi, %edx), %edi
370 jmp L(StrncpyFillTailWithZero)
371
372 .p2align 4
373L(CopyFrom1To16BytesUnalignedXmm6):
374 movdqu %xmm6, (%edi, %ecx)
375 jmp L(CopyFrom1To16BytesXmmExit)
376
377 .p2align 4
378L(CopyFrom1To16BytesUnalignedXmm5):
379 movdqu %xmm5, (%edi, %ecx)
380 jmp L(CopyFrom1To16BytesXmmExit)
381
382 .p2align 4
383L(CopyFrom1To16BytesUnalignedXmm4):
384 movdqu %xmm4, (%edi, %ecx)
385 jmp L(CopyFrom1To16BytesXmmExit)
386
387 .p2align 4
388L(CopyFrom1To16BytesUnalignedXmm3):
389 movdqu %xmm3, (%edi, %ecx)
390 jmp L(CopyFrom1To16BytesXmmExit)
391
392 .p2align 4
393L(CopyFrom1To16BytesUnalignedXmm1):
394 movdqu %xmm1, (%edi, %ecx)
395 jmp L(CopyFrom1To16BytesXmmExit)
396
397 .p2align 4
398L(CopyFrom1To16BytesExit):
399 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
400
401/* Case2 */
402
403 .p2align 4
404L(CopyFrom1To16BytesCase2):
405 add $16, %ebx
406 add %ecx, %edi
407 add %ecx, %esi
408 bsf %edx, %edx
409 cmp %ebx, %edx
410 jb L(CopyFrom1To16BytesExit)
411 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
412
413 .p2align 4
414L(CopyFrom1To32BytesCase2):
415 sub %ecx, %ebx
416 add %ecx, %esi
417 bsf %edx, %edx
418 add $16, %edx
419 sub %ecx, %edx
420 cmp %ebx, %edx
421 jb L(CopyFrom1To16BytesExit)
422 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
423
424L(CopyFrom1To16BytesTailCase2):
425 sub %ecx, %ebx
426 add %ecx, %esi
427 bsf %edx, %edx
428 cmp %ebx, %edx
429 jb L(CopyFrom1To16BytesExit)
430 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
431
432L(CopyFrom1To16BytesTail1Case2):
433 bsf %edx, %edx
434 cmp %ebx, %edx
435 jb L(CopyFrom1To16BytesExit)
436 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
437
438/* Case2 or Case3, Case3 */
439
440 .p2align 4
441L(CopyFrom1To16BytesCase2OrCase3):
442 test %edx, %edx
443 jnz L(CopyFrom1To16BytesCase2)
444L(CopyFrom1To16BytesCase3):
445 add $16, %ebx
446 add %ecx, %edi
447 add %ecx, %esi
448 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
449
450 .p2align 4
451L(CopyFrom1To32BytesCase2OrCase3):
452 test %edx, %edx
453 jnz L(CopyFrom1To32BytesCase2)
454 sub %ecx, %ebx
455 add %ecx, %esi
456 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
457
458 .p2align 4
459L(CopyFrom1To16BytesTailCase2OrCase3):
460 test %edx, %edx
461 jnz L(CopyFrom1To16BytesTailCase2)
462 sub %ecx, %ebx
463 add %ecx, %esi
464 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
465
466 .p2align 4
467L(CopyFrom1To32Bytes1Case2OrCase3):
468 add $16, %edi
469 add $16, %esi
470 sub $16, %ebx
471L(CopyFrom1To16BytesTail1Case2OrCase3):
472 test %edx, %edx
473 jnz L(CopyFrom1To16BytesTail1Case2)
474 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
475
476 .p2align 4
477L(Exit0):
478# ifdef USE_AS_STPCPY
479 mov %edi, %eax
480# endif
481 RETURN
482
483 .p2align 4
484L(Exit1):
485 movb %dh, (%edi)
486# ifdef USE_AS_STPCPY
487 lea (%edi), %eax
488# endif
489 sub $1, %ebx
490 lea 1(%edi), %edi
491 jnz L(StrncpyFillTailWithZero)
492 RETURN
493
494 .p2align 4
495L(Exit2):
496 movw (%esi), %dx
497 movw %dx, (%edi)
498# ifdef USE_AS_STPCPY
499 lea 1(%edi), %eax
500# endif
501 sub $2, %ebx
502 lea 2(%edi), %edi
503 jnz L(StrncpyFillTailWithZero)
504 RETURN
505
506 .p2align 4
507L(Exit3):
508 movw (%esi), %cx
509 movw %cx, (%edi)
510 movb %dh, 2(%edi)
511# ifdef USE_AS_STPCPY
512 lea 2(%edi), %eax
513# endif
514 sub $3, %ebx
515 lea 3(%edi), %edi
516 jnz L(StrncpyFillTailWithZero)
517 RETURN
518
519 .p2align 4
520L(Exit4):
521 movl (%esi), %edx
522 movl %edx, (%edi)
523# ifdef USE_AS_STPCPY
524 lea 3(%edi), %eax
525# endif
526 sub $4, %ebx
527 lea 4(%edi), %edi
528 jnz L(StrncpyFillTailWithZero)
529 RETURN
530
531 .p2align 4
532L(Exit5):
533 movl (%esi), %ecx
534 movb %dh, 4(%edi)
535 movl %ecx, (%edi)
536# ifdef USE_AS_STPCPY
537 lea 4(%edi), %eax
538# endif
539 sub $5, %ebx
540 lea 5(%edi), %edi
541 jnz L(StrncpyFillTailWithZero)
542 RETURN
543
544 .p2align 4
545L(Exit6):
546 movl (%esi), %ecx
547 movw 4(%esi), %dx
548 movl %ecx, (%edi)
549 movw %dx, 4(%edi)
550# ifdef USE_AS_STPCPY
551 lea 5(%edi), %eax
552# endif
553 sub $6, %ebx
554 lea 6(%edi), %edi
555 jnz L(StrncpyFillTailWithZero)
556 RETURN
557
558 .p2align 4
559L(Exit7):
560 movl (%esi), %ecx
561 movl 3(%esi), %edx
562 movl %ecx, (%edi)
563 movl %edx, 3(%edi)
564# ifdef USE_AS_STPCPY
565 lea 6(%edi), %eax
566# endif
567 sub $7, %ebx
568 lea 7(%edi), %edi
569 jnz L(StrncpyFillTailWithZero)
570 RETURN
571
572 .p2align 4
573L(Exit8):
574 movlpd (%esi), %xmm0
575 movlpd %xmm0, (%edi)
576# ifdef USE_AS_STPCPY
577 lea 7(%edi), %eax
578# endif
579 sub $8, %ebx
580 lea 8(%edi), %edi
581 jnz L(StrncpyFillTailWithZero)
582 RETURN
583
584 .p2align 4
585L(Exit9):
586 movlpd (%esi), %xmm0
587 movb %dh, 8(%edi)
588 movlpd %xmm0, (%edi)
589# ifdef USE_AS_STPCPY
590 lea 8(%edi), %eax
591# endif
592 sub $9, %ebx
593 lea 9(%edi), %edi
594 jnz L(StrncpyFillTailWithZero)
595 RETURN
596
597 .p2align 4
598L(Exit10):
599 movlpd (%esi), %xmm0
600 movw 8(%esi), %dx
601 movlpd %xmm0, (%edi)
602 movw %dx, 8(%edi)
603# ifdef USE_AS_STPCPY
604 lea 9(%edi), %eax
605# endif
606 sub $10, %ebx
607 lea 10(%edi), %edi
608 jnz L(StrncpyFillTailWithZero)
609 RETURN
610
611 .p2align 4
612L(Exit11):
613 movlpd (%esi), %xmm0
614 movl 7(%esi), %edx
615 movlpd %xmm0, (%edi)
616 movl %edx, 7(%edi)
617# ifdef USE_AS_STPCPY
618 lea 10(%edi), %eax
619# endif
620 sub $11, %ebx
621 lea 11(%edi), %edi
622 jnz L(StrncpyFillTailWithZero)
623 RETURN
624
625 .p2align 4
626L(Exit12):
627 movlpd (%esi), %xmm0
628 movl 8(%esi), %edx
629 movlpd %xmm0, (%edi)
630 movl %edx, 8(%edi)
631# ifdef USE_AS_STPCPY
632 lea 11(%edi), %eax
633# endif
634 sub $12, %ebx
635 lea 12(%edi), %edi
636 jnz L(StrncpyFillTailWithZero)
637 RETURN
638
639 .p2align 4
640L(Exit13):
641 movlpd (%esi), %xmm0
642 movlpd 5(%esi), %xmm1
643 movlpd %xmm0, (%edi)
644 movlpd %xmm1, 5(%edi)
645# ifdef USE_AS_STPCPY
646 lea 12(%edi), %eax
647# endif
648 sub $13, %ebx
649 lea 13(%edi), %edi
650 jnz L(StrncpyFillTailWithZero)
651 RETURN
652
653 .p2align 4
654L(Exit14):
655 movlpd (%esi), %xmm0
656 movlpd 6(%esi), %xmm1
657 movlpd %xmm0, (%edi)
658 movlpd %xmm1, 6(%edi)
659# ifdef USE_AS_STPCPY
660 lea 13(%edi), %eax
661# endif
662 sub $14, %ebx
663 lea 14(%edi), %edi
664 jnz L(StrncpyFillTailWithZero)
665 RETURN
666
667 .p2align 4
668L(Exit15):
669 movlpd (%esi), %xmm0
670 movlpd 7(%esi), %xmm1
671 movlpd %xmm0, (%edi)
672 movlpd %xmm1, 7(%edi)
673# ifdef USE_AS_STPCPY
674 lea 14(%edi), %eax
675# endif
676 sub $15, %ebx
677 lea 15(%edi), %edi
678 jnz L(StrncpyFillTailWithZero)
679 RETURN
680
681 .p2align 4
682L(Exit16):
683 movdqu (%esi), %xmm0
684 movdqu %xmm0, (%edi)
685# ifdef USE_AS_STPCPY
686 lea 15(%edi), %eax
687# endif
688 sub $16, %ebx
689 lea 16(%edi), %edi
690 jnz L(StrncpyFillTailWithZero)
691 RETURN
692
693 .p2align 4
694L(Exit17):
695 movdqu (%esi), %xmm0
696 movdqu %xmm0, (%edi)
697 movb %dh, 16(%edi)
698# ifdef USE_AS_STPCPY
699 lea 16(%edi), %eax
700# endif
701 sub $17, %ebx
702 lea 17(%edi), %edi
703 jnz L(StrncpyFillTailWithZero)
704 RETURN
705
706 .p2align 4
707L(Exit18):
708 movdqu (%esi), %xmm0
709 movw 16(%esi), %cx
710 movdqu %xmm0, (%edi)
711 movw %cx, 16(%edi)
712# ifdef USE_AS_STPCPY
713 lea 17(%edi), %eax
714# endif
715 sub $18, %ebx
716 lea 18(%edi), %edi
717 jnz L(StrncpyFillTailWithZero)
718 RETURN
719
720 .p2align 4
721L(Exit19):
722 movdqu (%esi), %xmm0
723 movl 15(%esi), %ecx
724 movdqu %xmm0, (%edi)
725 movl %ecx, 15(%edi)
726# ifdef USE_AS_STPCPY
727 lea 18(%edi), %eax
728# endif
729 sub $19, %ebx
730 lea 19(%edi), %edi
731 jnz L(StrncpyFillTailWithZero)
732 RETURN
733
734 .p2align 4
735L(Exit20):
736 movdqu (%esi), %xmm0
737 movl 16(%esi), %ecx
738 movdqu %xmm0, (%edi)
739 movl %ecx, 16(%edi)
740# ifdef USE_AS_STPCPY
741 lea 19(%edi), %eax
742# endif
743 sub $20, %ebx
744 lea 20(%edi), %edi
745 jnz L(StrncpyFillTailWithZero)
746 RETURN
747
748 .p2align 4
749L(Exit21):
750 movdqu (%esi), %xmm0
751 movl 16(%esi), %ecx
752 movdqu %xmm0, (%edi)
753 movl %ecx, 16(%edi)
754 movb %dh, 20(%edi)
755# ifdef USE_AS_STPCPY
756 lea 20(%edi), %eax
757# endif
758 sub $21, %ebx
759 lea 21(%edi), %edi
760 jnz L(StrncpyFillTailWithZero)
761 RETURN
762
763 .p2align 4
764L(Exit22):
765 movdqu (%esi), %xmm0
766 movlpd 14(%esi), %xmm3
767 movdqu %xmm0, (%edi)
768 movlpd %xmm3, 14(%edi)
769# ifdef USE_AS_STPCPY
770 lea 21(%edi), %eax
771# endif
772 sub $22, %ebx
773 lea 22(%edi), %edi
774 jnz L(StrncpyFillTailWithZero)
775 RETURN
776
777 .p2align 4
778L(Exit23):
779 movdqu (%esi), %xmm0
780 movlpd 15(%esi), %xmm3
781 movdqu %xmm0, (%edi)
782 movlpd %xmm3, 15(%edi)
783# ifdef USE_AS_STPCPY
784 lea 22(%edi), %eax
785# endif
786 sub $23, %ebx
787 lea 23(%edi), %edi
788 jnz L(StrncpyFillTailWithZero)
789 RETURN
790
791 .p2align 4
792L(Exit24):
793 movdqu (%esi), %xmm0
794 movlpd 16(%esi), %xmm2
795 movdqu %xmm0, (%edi)
796 movlpd %xmm2, 16(%edi)
797# ifdef USE_AS_STPCPY
798 lea 23(%edi), %eax
799# endif
800 sub $24, %ebx
801 lea 24(%edi), %edi
802 jnz L(StrncpyFillTailWithZero)
803 RETURN
804
805 .p2align 4
806L(Exit25):
807 movdqu (%esi), %xmm0
808 movlpd 16(%esi), %xmm2
809 movdqu %xmm0, (%edi)
810 movlpd %xmm2, 16(%edi)
811 movb %dh, 24(%edi)
812# ifdef USE_AS_STPCPY
813 lea 24(%edi), %eax
814# endif
815 sub $25, %ebx
816 lea 25(%edi), %edi
817 jnz L(StrncpyFillTailWithZero)
818 RETURN
819
820 .p2align 4
821L(Exit26):
822 movdqu (%esi), %xmm0
823 movlpd 16(%esi), %xmm2
824 movw 24(%esi), %cx
825 movdqu %xmm0, (%edi)
826 movlpd %xmm2, 16(%edi)
827 movw %cx, 24(%edi)
828# ifdef USE_AS_STPCPY
829 lea 25(%edi), %eax
830# endif
831 sub $26, %ebx
832 lea 26(%edi), %edi
833 jnz L(StrncpyFillTailWithZero)
834 RETURN
835
836 .p2align 4
837L(Exit27):
838 movdqu (%esi), %xmm0
839 movlpd 16(%esi), %xmm2
840 movl 23(%esi), %ecx
841 movdqu %xmm0, (%edi)
842 movlpd %xmm2, 16(%edi)
843 movl %ecx, 23(%edi)
844# ifdef USE_AS_STPCPY
845 lea 26(%edi), %eax
846# endif
847 sub $27, %ebx
848 lea 27(%edi), %edi
849 jnz L(StrncpyFillTailWithZero)
850 RETURN
851
852 .p2align 4
853L(Exit28):
854 movdqu (%esi), %xmm0
855 movlpd 16(%esi), %xmm2
856 movl 24(%esi), %ecx
857 movdqu %xmm0, (%edi)
858 movlpd %xmm2, 16(%edi)
859 movl %ecx, 24(%edi)
860# ifdef USE_AS_STPCPY
861 lea 27(%edi), %eax
862# endif
863 sub $28, %ebx
864 lea 28(%edi), %edi
865 jnz L(StrncpyFillTailWithZero)
866 RETURN
867
868 .p2align 4
869L(Exit29):
870 movdqu (%esi), %xmm0
871 movdqu 13(%esi), %xmm2
872 movdqu %xmm0, (%edi)
873 movdqu %xmm2, 13(%edi)
874# ifdef USE_AS_STPCPY
875 lea 28(%edi), %eax
876# endif
877 sub $29, %ebx
878 lea 29(%edi), %edi
879 jnz L(StrncpyFillTailWithZero)
880 RETURN
881
882 .p2align 4
883L(Exit30):
884 movdqu (%esi), %xmm0
885 movdqu 14(%esi), %xmm2
886 movdqu %xmm0, (%edi)
887 movdqu %xmm2, 14(%edi)
888# ifdef USE_AS_STPCPY
889 lea 29(%edi), %eax
890# endif
891 sub $30, %ebx
892 lea 30(%edi), %edi
893 jnz L(StrncpyFillTailWithZero)
894 RETURN
895
896
897 .p2align 4
898L(Exit31):
899 movdqu (%esi), %xmm0
900 movdqu 15(%esi), %xmm2
901 movdqu %xmm0, (%edi)
902 movdqu %xmm2, 15(%edi)
903# ifdef USE_AS_STPCPY
904 lea 30(%edi), %eax
905# endif
906 sub $31, %ebx
907 lea 31(%edi), %edi
908 jnz L(StrncpyFillTailWithZero)
909 RETURN
910
911 .p2align 4
912L(Exit32):
913 movdqu (%esi), %xmm0
914 movdqu 16(%esi), %xmm2
915 movdqu %xmm0, (%edi)
916 movdqu %xmm2, 16(%edi)
917# ifdef USE_AS_STPCPY
918 lea 31(%edi), %eax
919# endif
920 sub $32, %ebx
921 lea 32(%edi), %edi
922 jnz L(StrncpyFillTailWithZero)
923 RETURN
924
925 .p2align 4
926L(StrncpyExit1):
927 movb (%esi), %dl
928 movb %dl, (%edi)
929# ifdef USE_AS_STPCPY
930 lea 1(%edi), %eax
931# endif
932 RETURN
933
934 .p2align 4
935L(StrncpyExit2):
936 movw (%esi), %dx
937 movw %dx, (%edi)
938# ifdef USE_AS_STPCPY
939 lea 2(%edi), %eax
940# endif
941 RETURN
942
943 .p2align 4
944L(StrncpyExit3):
945 movw (%esi), %cx
946 movb 2(%esi), %dl
947 movw %cx, (%edi)
948 movb %dl, 2(%edi)
949# ifdef USE_AS_STPCPY
950 lea 3(%edi), %eax
951# endif
952 RETURN
953
954 .p2align 4
955L(StrncpyExit4):
956 movl (%esi), %edx
957 movl %edx, (%edi)
958# ifdef USE_AS_STPCPY
959 lea 4(%edi), %eax
960# endif
961 RETURN
962
963 .p2align 4
964L(StrncpyExit5):
965 movl (%esi), %ecx
966 movb 4(%esi), %dl
967 movl %ecx, (%edi)
968 movb %dl, 4(%edi)
969# ifdef USE_AS_STPCPY
970 lea 5(%edi), %eax
971# endif
972 RETURN
973
974 .p2align 4
975L(StrncpyExit6):
976 movl (%esi), %ecx
977 movw 4(%esi), %dx
978 movl %ecx, (%edi)
979 movw %dx, 4(%edi)
980# ifdef USE_AS_STPCPY
981 lea 6(%edi), %eax
982# endif
983 RETURN
984
985 .p2align 4
986L(StrncpyExit7):
987 movl (%esi), %ecx
988 movl 3(%esi), %edx
989 movl %ecx, (%edi)
990 movl %edx, 3(%edi)
991# ifdef USE_AS_STPCPY
992 lea 7(%edi), %eax
993# endif
994 RETURN
995
996 .p2align 4
997L(StrncpyExit8):
998 movlpd (%esi), %xmm0
999 movlpd %xmm0, (%edi)
1000# ifdef USE_AS_STPCPY
1001 lea 8(%edi), %eax
1002# endif
1003 RETURN
1004
1005 .p2align 4
1006L(StrncpyExit9):
1007 movlpd (%esi), %xmm0
1008 movb 8(%esi), %dl
1009 movlpd %xmm0, (%edi)
1010 movb %dl, 8(%edi)
1011# ifdef USE_AS_STPCPY
1012 lea 9(%edi), %eax
1013# endif
1014 RETURN
1015
1016 .p2align 4
1017L(StrncpyExit10):
1018 movlpd (%esi), %xmm0
1019 movw 8(%esi), %dx
1020 movlpd %xmm0, (%edi)
1021 movw %dx, 8(%edi)
1022# ifdef USE_AS_STPCPY
1023 lea 10(%edi), %eax
1024# endif
1025 RETURN
1026
1027 .p2align 4
1028L(StrncpyExit11):
1029 movlpd (%esi), %xmm0
1030 movl 7(%esi), %edx
1031 movlpd %xmm0, (%edi)
1032 movl %edx, 7(%edi)
1033# ifdef USE_AS_STPCPY
1034 lea 11(%edi), %eax
1035# endif
1036 RETURN
1037
1038 .p2align 4
1039L(StrncpyExit12):
1040 movlpd (%esi), %xmm0
1041 movl 8(%esi), %edx
1042 movlpd %xmm0, (%edi)
1043 movl %edx, 8(%edi)
1044# ifdef USE_AS_STPCPY
1045 lea 12(%edi), %eax
1046# endif
1047 RETURN
1048
1049 .p2align 4
1050L(StrncpyExit13):
1051 movlpd (%esi), %xmm0
1052 movlpd 5(%esi), %xmm1
1053 movlpd %xmm0, (%edi)
1054 movlpd %xmm1, 5(%edi)
1055# ifdef USE_AS_STPCPY
1056 lea 13(%edi), %eax
1057# endif
1058 RETURN
1059
1060 .p2align 4
1061L(StrncpyExit14):
1062 movlpd (%esi), %xmm0
1063 movlpd 6(%esi), %xmm1
1064 movlpd %xmm0, (%edi)
1065 movlpd %xmm1, 6(%edi)
1066# ifdef USE_AS_STPCPY
1067 lea 14(%edi), %eax
1068# endif
1069 RETURN
1070
1071 .p2align 4
1072L(StrncpyExit15):
1073 movlpd (%esi), %xmm0
1074 movlpd 7(%esi), %xmm1
1075 movlpd %xmm0, (%edi)
1076 movlpd %xmm1, 7(%edi)
1077# ifdef USE_AS_STPCPY
1078 lea 15(%edi), %eax
1079# endif
1080 RETURN
1081
1082 .p2align 4
1083L(StrncpyExit16):
1084 movdqu (%esi), %xmm0
1085 movdqu %xmm0, (%edi)
1086# ifdef USE_AS_STPCPY
1087 lea 16(%edi), %eax
1088# endif
1089 RETURN
1090
1091 .p2align 4
1092L(StrncpyExit17):
1093 movdqu (%esi), %xmm0
1094 movb 16(%esi), %cl
1095 movdqu %xmm0, (%edi)
1096 movb %cl, 16(%edi)
1097# ifdef USE_AS_STPCPY
1098 lea 17(%edi), %eax
1099# endif
1100 RETURN
1101
1102 .p2align 4
1103L(StrncpyExit18):
1104 movdqu (%esi), %xmm0
1105 movw 16(%esi), %cx
1106 movdqu %xmm0, (%edi)
1107 movw %cx, 16(%edi)
1108# ifdef USE_AS_STPCPY
1109 lea 18(%edi), %eax
1110# endif
1111 RETURN
1112
1113 .p2align 4
1114L(StrncpyExit19):
1115 movdqu (%esi), %xmm0
1116 movl 15(%esi), %ecx
1117 movdqu %xmm0, (%edi)
1118 movl %ecx, 15(%edi)
1119# ifdef USE_AS_STPCPY
1120 lea 19(%edi), %eax
1121# endif
1122 RETURN
1123
1124 .p2align 4
1125L(StrncpyExit20):
1126 movdqu (%esi), %xmm0
1127 movl 16(%esi), %ecx
1128 movdqu %xmm0, (%edi)
1129 movl %ecx, 16(%edi)
1130# ifdef USE_AS_STPCPY
1131 lea 20(%edi), %eax
1132# endif
1133 RETURN
1134
1135 .p2align 4
1136L(StrncpyExit21):
1137 movdqu (%esi), %xmm0
1138 movl 16(%esi), %ecx
1139 movb 20(%esi), %dl
1140 movdqu %xmm0, (%edi)
1141 movl %ecx, 16(%edi)
1142 movb %dl, 20(%edi)
1143# ifdef USE_AS_STPCPY
1144 lea 21(%edi), %eax
1145# endif
1146 RETURN
1147
1148 .p2align 4
1149L(StrncpyExit22):
1150 movdqu (%esi), %xmm0
1151 movlpd 14(%esi), %xmm3
1152 movdqu %xmm0, (%edi)
1153 movlpd %xmm3, 14(%edi)
1154# ifdef USE_AS_STPCPY
1155 lea 22(%edi), %eax
1156# endif
1157 RETURN
1158
1159 .p2align 4
1160L(StrncpyExit23):
1161 movdqu (%esi), %xmm0
1162 movlpd 15(%esi), %xmm3
1163 movdqu %xmm0, (%edi)
1164 movlpd %xmm3, 15(%edi)
1165# ifdef USE_AS_STPCPY
1166 lea 23(%edi), %eax
1167# endif
1168 RETURN
1169
1170 .p2align 4
1171L(StrncpyExit24):
1172 movdqu (%esi), %xmm0
1173 movlpd 16(%esi), %xmm2
1174 movdqu %xmm0, (%edi)
1175 movlpd %xmm2, 16(%edi)
1176# ifdef USE_AS_STPCPY
1177 lea 24(%edi), %eax
1178# endif
1179 RETURN
1180
1181 .p2align 4
1182L(StrncpyExit25):
1183 movdqu (%esi), %xmm0
1184 movlpd 16(%esi), %xmm2
1185 movb 24(%esi), %cl
1186 movdqu %xmm0, (%edi)
1187 movlpd %xmm2, 16(%edi)
1188 movb %cl, 24(%edi)
1189# ifdef USE_AS_STPCPY
1190 lea 25(%edi), %eax
1191# endif
1192 RETURN
1193
1194 .p2align 4
1195L(StrncpyExit26):
1196 movdqu (%esi), %xmm0
1197 movlpd 16(%esi), %xmm2
1198 movw 24(%esi), %cx
1199 movdqu %xmm0, (%edi)
1200 movlpd %xmm2, 16(%edi)
1201 movw %cx, 24(%edi)
1202# ifdef USE_AS_STPCPY
1203 lea 26(%edi), %eax
1204# endif
1205 RETURN
1206
1207 .p2align 4
1208L(StrncpyExit27):
1209 movdqu (%esi), %xmm0
1210 movlpd 16(%esi), %xmm2
1211 movl 23(%esi), %ecx
1212 movdqu %xmm0, (%edi)
1213 movlpd %xmm2, 16(%edi)
1214 movl %ecx, 23(%edi)
1215# ifdef USE_AS_STPCPY
1216 lea 27(%edi), %eax
1217# endif
1218 RETURN
1219
1220 .p2align 4
1221L(StrncpyExit28):
1222 movdqu (%esi), %xmm0
1223 movlpd 16(%esi), %xmm2
1224 movl 24(%esi), %ecx
1225 movdqu %xmm0, (%edi)
1226 movlpd %xmm2, 16(%edi)
1227 movl %ecx, 24(%edi)
1228# ifdef USE_AS_STPCPY
1229 lea 28(%edi), %eax
1230# endif
1231 RETURN
1232
1233 .p2align 4
1234L(StrncpyExit29):
1235 movdqu (%esi), %xmm0
1236 movdqu 13(%esi), %xmm2
1237 movdqu %xmm0, (%edi)
1238 movdqu %xmm2, 13(%edi)
1239# ifdef USE_AS_STPCPY
1240 lea 29(%edi), %eax
1241# endif
1242 RETURN
1243
1244 .p2align 4
1245L(StrncpyExit30):
1246 movdqu (%esi), %xmm0
1247 movdqu 14(%esi), %xmm2
1248 movdqu %xmm0, (%edi)
1249 movdqu %xmm2, 14(%edi)
1250# ifdef USE_AS_STPCPY
1251 lea 30(%edi), %eax
1252# endif
1253 RETURN
1254
1255 .p2align 4
1256L(StrncpyExit31):
1257 movdqu (%esi), %xmm0
1258 movdqu 15(%esi), %xmm2
1259 movdqu %xmm0, (%edi)
1260 movdqu %xmm2, 15(%edi)
1261# ifdef USE_AS_STPCPY
1262 lea 31(%edi), %eax
1263# endif
1264 RETURN
1265
1266 .p2align 4
1267L(StrncpyExit32):
1268 movdqu (%esi), %xmm0
1269 movdqu 16(%esi), %xmm2
1270 movdqu %xmm0, (%edi)
1271 movdqu %xmm2, 16(%edi)
1272# ifdef USE_AS_STPCPY
1273 lea 32(%edi), %eax
1274# endif
1275 RETURN
1276
1277 .p2align 4
1278L(StrncpyExit33):
1279 movdqu (%esi), %xmm0
1280 movdqu 16(%esi), %xmm2
1281 movb 32(%esi), %cl
1282 movdqu %xmm0, (%edi)
1283 movdqu %xmm2, 16(%edi)
1284 movb %cl, 32(%edi)
1285 RETURN
1286
1287 .p2align 4
1288L(Fill0):
1289 RETURN
1290
1291 .p2align 4
1292L(Fill1):
1293 movb %dl, (%edi)
1294 RETURN
1295
1296 .p2align 4
1297L(Fill2):
1298 movw %dx, (%edi)
1299 RETURN
1300
1301 .p2align 4
1302L(Fill3):
1303 movl %edx, -1(%edi)
1304 RETURN
1305
1306 .p2align 4
1307L(Fill4):
1308 movl %edx, (%edi)
1309 RETURN
1310
1311 .p2align 4
1312L(Fill5):
1313 movl %edx, (%edi)
1314 movb %dl, 4(%edi)
1315 RETURN
1316
1317 .p2align 4
1318L(Fill6):
1319 movl %edx, (%edi)
1320 movw %dx, 4(%edi)
1321 RETURN
1322
1323 .p2align 4
1324L(Fill7):
1325 movlpd %xmm0, -1(%edi)
1326 RETURN
1327
1328 .p2align 4
1329L(Fill8):
1330 movlpd %xmm0, (%edi)
1331 RETURN
1332
1333 .p2align 4
1334L(Fill9):
1335 movlpd %xmm0, (%edi)
1336 movb %dl, 8(%edi)
1337 RETURN
1338
1339 .p2align 4
1340L(Fill10):
1341 movlpd %xmm0, (%edi)
1342 movw %dx, 8(%edi)
1343 RETURN
1344
1345 .p2align 4
1346L(Fill11):
1347 movlpd %xmm0, (%edi)
1348 movl %edx, 7(%edi)
1349 RETURN
1350
1351 .p2align 4
1352L(Fill12):
1353 movlpd %xmm0, (%edi)
1354 movl %edx, 8(%edi)
1355 RETURN
1356
1357 .p2align 4
1358L(Fill13):
1359 movlpd %xmm0, (%edi)
1360 movlpd %xmm0, 5(%edi)
1361 RETURN
1362
1363 .p2align 4
1364L(Fill14):
1365 movlpd %xmm0, (%edi)
1366 movlpd %xmm0, 6(%edi)
1367 RETURN
1368
1369 .p2align 4
1370L(Fill15):
1371 movdqu %xmm0, -1(%edi)
1372 RETURN
1373
1374 .p2align 4
1375L(Fill16):
1376 movdqu %xmm0, (%edi)
1377 RETURN
1378
1379 .p2align 4
1380L(CopyFrom1To16BytesUnalignedXmm2):
1381 movdqu %xmm2, (%edi, %ecx)
1382
1383 .p2align 4
1384L(CopyFrom1To16BytesXmmExit):
1385 bsf %edx, %edx
1386 add $15, %ebx
1387 add %ecx, %edi
1388# ifdef USE_AS_STPCPY
1389 lea (%edi, %edx), %eax
1390# endif
1391 sub %edx, %ebx
1392 lea 1(%edi, %edx), %edi
1393
1394 .p2align 4
1395L(StrncpyFillTailWithZero):
1396 pxor %xmm0, %xmm0
1397 xor %edx, %edx
1398 sub $16, %ebx
1399 jbe L(StrncpyFillExit)
1400
1401 movdqu %xmm0, (%edi)
1402 add $16, %edi
1403
1404 mov %edi, %esi
1405 and $0xf, %esi
1406 sub %esi, %edi
1407 add %esi, %ebx
1408 sub $64, %ebx
1409 jb L(StrncpyFillLess64)
1410
1411L(StrncpyFillLoopMovdqa):
1412 movdqa %xmm0, (%edi)
1413 movdqa %xmm0, 16(%edi)
1414 movdqa %xmm0, 32(%edi)
1415 movdqa %xmm0, 48(%edi)
1416 add $64, %edi
1417 sub $64, %ebx
1418 jae L(StrncpyFillLoopMovdqa)
1419
1420L(StrncpyFillLess64):
1421 add $32, %ebx
1422 jl L(StrncpyFillLess32)
1423 movdqa %xmm0, (%edi)
1424 movdqa %xmm0, 16(%edi)
1425 add $32, %edi
1426 sub $16, %ebx
1427 jl L(StrncpyFillExit)
1428 movdqa %xmm0, (%edi)
1429 add $16, %edi
1430 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1431
1432L(StrncpyFillLess32):
1433 add $16, %ebx
1434 jl L(StrncpyFillExit)
1435 movdqa %xmm0, (%edi)
1436 add $16, %edi
1437 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1438
1439L(StrncpyFillExit):
1440 add $16, %ebx
1441 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1442
1443 .p2align 4
1444L(UnalignedLeaveCase2OrCase3):
1445 test %edx, %edx
1446 jnz L(Unaligned64LeaveCase2)
1447L(Unaligned64LeaveCase3):
1448 lea 64(%ebx), %ecx
1449 and $-16, %ecx
1450 add $48, %ebx
1451 jl L(CopyFrom1To16BytesCase3)
1452 movdqu %xmm4, (%edi)
1453 sub $16, %ebx
1454 jb L(CopyFrom1To16BytesCase3)
1455 movdqu %xmm5, 16(%edi)
1456 sub $16, %ebx
1457 jb L(CopyFrom1To16BytesCase3)
1458 movdqu %xmm6, 32(%edi)
1459 sub $16, %ebx
1460 jb L(CopyFrom1To16BytesCase3)
1461 movdqu %xmm7, 48(%edi)
1462# ifdef USE_AS_STPCPY
1463 lea 64(%edi), %eax
1464# endif
1465 RETURN
1466
1467 .p2align 4
1468L(Unaligned64LeaveCase2):
1469 xor %ecx, %ecx
1470 pcmpeqb %xmm4, %xmm0
1471 pmovmskb %xmm0, %edx
1472 add $48, %ebx
1473 jle L(CopyFrom1To16BytesCase2OrCase3)
1474 test %edx, %edx
1475 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1476
1477 pcmpeqb %xmm5, %xmm0
1478 pmovmskb %xmm0, %edx
1479 movdqu %xmm4, (%edi)
1480 add $16, %ecx
1481 sub $16, %ebx
1482 jbe L(CopyFrom1To16BytesCase2OrCase3)
1483 test %edx, %edx
1484 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1485
1486 pcmpeqb %xmm6, %xmm0
1487 pmovmskb %xmm0, %edx
1488 movdqu %xmm5, 16(%edi)
1489 add $16, %ecx
1490 sub $16, %ebx
1491 jbe L(CopyFrom1To16BytesCase2OrCase3)
1492 test %edx, %edx
1493 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1494
1495 pcmpeqb %xmm7, %xmm0
1496 pmovmskb %xmm0, %edx
1497 movdqu %xmm6, 32(%edi)
1498 lea 16(%edi, %ecx), %edi
1499 lea 16(%esi, %ecx), %esi
1500 bsf %edx, %edx
1501 cmp %ebx, %edx
1502 jb L(CopyFrom1To16BytesExit)
1503 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
1504
1505 .p2align 4
1506L(ExitZero):
1507 movl %edi, %eax
1508 RETURN
1509
1510END (STRCPY)
1511
1512 .p2align 4
1513 .section .rodata
1514L(ExitTable):
1515 .int JMPTBL(L(Exit1), L(ExitTable))
1516 .int JMPTBL(L(Exit2), L(ExitTable))
1517 .int JMPTBL(L(Exit3), L(ExitTable))
1518 .int JMPTBL(L(Exit4), L(ExitTable))
1519 .int JMPTBL(L(Exit5), L(ExitTable))
1520 .int JMPTBL(L(Exit6), L(ExitTable))
1521 .int JMPTBL(L(Exit7), L(ExitTable))
1522 .int JMPTBL(L(Exit8), L(ExitTable))
1523 .int JMPTBL(L(Exit9), L(ExitTable))
1524 .int JMPTBL(L(Exit10), L(ExitTable))
1525 .int JMPTBL(L(Exit11), L(ExitTable))
1526 .int JMPTBL(L(Exit12), L(ExitTable))
1527 .int JMPTBL(L(Exit13), L(ExitTable))
1528 .int JMPTBL(L(Exit14), L(ExitTable))
1529 .int JMPTBL(L(Exit15), L(ExitTable))
1530 .int JMPTBL(L(Exit16), L(ExitTable))
1531 .int JMPTBL(L(Exit17), L(ExitTable))
1532 .int JMPTBL(L(Exit18), L(ExitTable))
1533 .int JMPTBL(L(Exit19), L(ExitTable))
1534 .int JMPTBL(L(Exit20), L(ExitTable))
1535 .int JMPTBL(L(Exit21), L(ExitTable))
1536 .int JMPTBL(L(Exit22), L(ExitTable))
1537 .int JMPTBL(L(Exit23), L(ExitTable))
1538 .int JMPTBL(L(Exit24), L(ExitTable))
1539 .int JMPTBL(L(Exit25), L(ExitTable))
1540 .int JMPTBL(L(Exit26), L(ExitTable))
1541 .int JMPTBL(L(Exit27), L(ExitTable))
1542 .int JMPTBL(L(Exit28), L(ExitTable))
1543 .int JMPTBL(L(Exit29), L(ExitTable))
1544 .int JMPTBL(L(Exit30), L(ExitTable))
1545 .int JMPTBL(L(Exit31), L(ExitTable))
1546 .int JMPTBL(L(Exit32), L(ExitTable))
1547
1548L(ExitStrncpyTable):
1549 .int JMPTBL(L(Exit0), L(ExitStrncpyTable))
1550 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1551 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1552 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1553 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1554 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1555 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1556 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1557 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1558 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1559 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1560 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1561 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1562 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1563 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1564 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1565 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1566 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1567 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1568 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1569 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1570 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1571 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1572 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1573 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1574 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1575 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1576 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1577 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1578 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1579 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1580 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1581 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1582 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1583
1584 .p2align 4
1585L(FillTable):
1586 .int JMPTBL(L(Fill0), L(FillTable))
1587 .int JMPTBL(L(Fill1), L(FillTable))
1588 .int JMPTBL(L(Fill2), L(FillTable))
1589 .int JMPTBL(L(Fill3), L(FillTable))
1590 .int JMPTBL(L(Fill4), L(FillTable))
1591 .int JMPTBL(L(Fill5), L(FillTable))
1592 .int JMPTBL(L(Fill6), L(FillTable))
1593 .int JMPTBL(L(Fill7), L(FillTable))
1594 .int JMPTBL(L(Fill8), L(FillTable))
1595 .int JMPTBL(L(Fill9), L(FillTable))
1596 .int JMPTBL(L(Fill10), L(FillTable))
1597 .int JMPTBL(L(Fill11), L(FillTable))
1598 .int JMPTBL(L(Fill12), L(FillTable))
1599 .int JMPTBL(L(Fill13), L(FillTable))
1600 .int JMPTBL(L(Fill14), L(FillTable))
1601 .int JMPTBL(L(Fill15), L(FillTable))
1602 .int JMPTBL(L(Fill16), L(FillTable))
1603# else
1604# define PARMS 4
1605# define ENTRANCE
1606# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
1607# define RETURN1 ret
1608
1609 .text
1610ENTRY (STRCPY)
1611 ENTRANCE
1612 mov STR1(%esp), %edx
1613 mov STR2(%esp), %ecx
1614
1615 cmpb $0, (%ecx)
1616 jz L(ExitTail1)
1617 cmpb $0, 1(%ecx)
1618 jz L(ExitTail2)
1619 cmpb $0, 2(%ecx)
1620 jz L(ExitTail3)
1621 cmpb $0, 3(%ecx)
1622 jz L(ExitTail4)
1623 cmpb $0, 4(%ecx)
1624 jz L(ExitTail5)
1625 cmpb $0, 5(%ecx)
1626 jz L(ExitTail6)
1627 cmpb $0, 6(%ecx)
1628 jz L(ExitTail7)
1629 cmpb $0, 7(%ecx)
1630 jz L(ExitTail8)
1631 cmpb $0, 8(%ecx)
1632 jz L(ExitTail9)
1633 cmpb $0, 9(%ecx)
1634 jz L(ExitTail10)
1635 cmpb $0, 10(%ecx)
1636 jz L(ExitTail11)
1637 cmpb $0, 11(%ecx)
1638 jz L(ExitTail12)
1639 cmpb $0, 12(%ecx)
1640 jz L(ExitTail13)
1641 cmpb $0, 13(%ecx)
1642 jz L(ExitTail14)
1643 cmpb $0, 14(%ecx)
1644 jz L(ExitTail15)
1645 cmpb $0, 15(%ecx)
1646 jz L(ExitTail16)
1647
1648 PUSH (%edi)
1649 PUSH (%ebx)
1650
1651 mov %edx, %edi
1652 lea 16(%ecx), %ebx
1653 and $-16, %ebx
1654 pxor %xmm0, %xmm0
1655 movdqu (%ecx), %xmm1
1656 movdqu %xmm1, (%edx)
1657 pcmpeqb (%ebx), %xmm0
1658 pmovmskb %xmm0, %eax
1659 sub %ecx, %ebx
1660 test %eax, %eax
1661 jnz L(CopyFrom1To16Bytes)
1662
1663 mov %ecx, %eax
1664 lea 16(%ecx), %ecx
1665 and $-16, %ecx
1666 sub %ecx, %eax
1667 sub %eax, %edx
1668 xor %ebx, %ebx
1669
1670 .p2align 4
1671 movdqa (%ecx), %xmm1
1672 movaps 16(%ecx), %xmm2
1673 movdqu %xmm1, (%edx)
1674 pcmpeqb %xmm2, %xmm0
1675 pmovmskb %xmm0, %eax
1676 add $16, %ebx
1677 test %eax, %eax
1678 jnz L(CopyFrom1To16Bytes)
1679
1680 movaps 16(%ecx, %ebx), %xmm3
1681 movdqu %xmm2, (%edx, %ebx)
1682 pcmpeqb %xmm3, %xmm0
1683 pmovmskb %xmm0, %eax
1684 add $16, %ebx
1685 test %eax, %eax
1686 jnz L(CopyFrom1To16Bytes)
1687
1688 movaps 16(%ecx, %ebx), %xmm4
1689 movdqu %xmm3, (%edx, %ebx)
1690 pcmpeqb %xmm4, %xmm0
1691 pmovmskb %xmm0, %eax
1692 add $16, %ebx
1693 test %eax, %eax
1694 jnz L(CopyFrom1To16Bytes)
1695
1696 movaps 16(%ecx, %ebx), %xmm1
1697 movdqu %xmm4, (%edx, %ebx)
1698 pcmpeqb %xmm1, %xmm0
1699 pmovmskb %xmm0, %eax
1700 add $16, %ebx
1701 test %eax, %eax
1702 jnz L(CopyFrom1To16Bytes)
1703
1704 movaps 16(%ecx, %ebx), %xmm2
1705 movdqu %xmm1, (%edx, %ebx)
1706 pcmpeqb %xmm2, %xmm0
1707 pmovmskb %xmm0, %eax
1708 add $16, %ebx
1709 test %eax, %eax
1710 jnz L(CopyFrom1To16Bytes)
1711
1712 movaps 16(%ecx, %ebx), %xmm3
1713 movdqu %xmm2, (%edx, %ebx)
1714 pcmpeqb %xmm3, %xmm0
1715 pmovmskb %xmm0, %eax
1716 add $16, %ebx
1717 test %eax, %eax
1718 jnz L(CopyFrom1To16Bytes)
1719
1720 movdqu %xmm3, (%edx, %ebx)
1721 mov %ecx, %eax
1722 lea 16(%ecx, %ebx), %ecx
1723 and $-0x40, %ecx
1724 sub %ecx, %eax
1725 sub %eax, %edx
1726
1727L(Aligned64Loop):
1728 movaps (%ecx), %xmm2
1729 movaps %xmm2, %xmm4
1730 movaps 16(%ecx), %xmm5
1731 movaps 32(%ecx), %xmm3
1732 movaps %xmm3, %xmm6
1733 movaps 48(%ecx), %xmm7
1734 pminub %xmm5, %xmm2
1735 add $64, %ecx
1736 pminub %xmm7, %xmm3
1737 add $64, %edx
1738 pminub %xmm2, %xmm3
1739 pcmpeqb %xmm0, %xmm3
1740 pmovmskb %xmm3, %eax
1741 test %eax, %eax
1742 jnz L(Aligned64Leave)
1743L(Aligned64Loop_start):
1744 movdqu %xmm4, -64(%edx)
1745 movaps (%ecx), %xmm2
1746 movdqa %xmm2, %xmm4
1747 movdqu %xmm5, -48(%edx)
1748 movaps 16(%ecx), %xmm5
1749 pminub %xmm5, %xmm2
1750 movaps 32(%ecx), %xmm3
1751 movdqu %xmm6, -32(%edx)
1752 movaps %xmm3, %xmm6
1753 movdqu %xmm7, -16(%edx)
1754 movaps 48(%ecx), %xmm7
1755 pminub %xmm7, %xmm3
1756 pminub %xmm2, %xmm3
1757 pcmpeqb %xmm3, %xmm0
1758 pmovmskb %xmm0, %eax
1759 add $64, %edx
1760 add $64, %ecx
1761 test %eax, %eax
1762 jz L(Aligned64Loop_start)
1763L(Aligned64Leave):
1764 sub $0xa0, %ebx
1765 pxor %xmm0, %xmm0
1766 pcmpeqb %xmm4, %xmm0
1767 pmovmskb %xmm0, %eax
1768 test %eax, %eax
1769 jnz L(CopyFrom1To16Bytes)
1770
1771 pcmpeqb %xmm5, %xmm0
1772 pmovmskb %xmm0, %eax
1773 movdqu %xmm4, -64(%edx)
1774 test %eax, %eax
1775 lea 16(%ebx), %ebx
1776 jnz L(CopyFrom1To16Bytes)
1777
1778 pcmpeqb %xmm6, %xmm0
1779 pmovmskb %xmm0, %eax
1780 movdqu %xmm5, -48(%edx)
1781 test %eax, %eax
1782 lea 16(%ebx), %ebx
1783 jnz L(CopyFrom1To16Bytes)
1784
1785 movdqu %xmm6, -32(%edx)
1786 pcmpeqb %xmm7, %xmm0
1787 pmovmskb %xmm0, %eax
1788 lea 16(%ebx), %ebx
1789
1790/*-----------------End of main part---------------------------*/
1791
1792 .p2align 4
1793L(CopyFrom1To16Bytes):
1794 add %ebx, %edx
1795 add %ebx, %ecx
1796
1797 POP (%ebx)
1798 test %al, %al
1799 jz L(ExitHigh)
1800 test $0x01, %al
1801 jnz L(Exit1)
1802 test $0x02, %al
1803 jnz L(Exit2)
1804 test $0x04, %al
1805 jnz L(Exit3)
1806 test $0x08, %al
1807 jnz L(Exit4)
1808 test $0x10, %al
1809 jnz L(Exit5)
1810 test $0x20, %al
1811 jnz L(Exit6)
1812 test $0x40, %al
1813 jnz L(Exit7)
1814 /* Exit 8 */
1815 movl (%ecx), %eax
1816 movl %eax, (%edx)
1817 movl 4(%ecx), %eax
1818 movl %eax, 4(%edx)
1819# ifdef USE_AS_STPCPY
1820 lea 7(%edx), %eax
1821# else
1822 movl %edi, %eax
1823# endif
1824 RETURN
1825
1826 .p2align 4
1827L(ExitHigh):
1828 test $0x01, %ah
1829 jnz L(Exit9)
1830 test $0x02, %ah
1831 jnz L(Exit10)
1832 test $0x04, %ah
1833 jnz L(Exit11)
1834 test $0x08, %ah
1835 jnz L(Exit12)
1836 test $0x10, %ah
1837 jnz L(Exit13)
1838 test $0x20, %ah
1839 jnz L(Exit14)
1840 test $0x40, %ah
1841 jnz L(Exit15)
1842 /* Exit 16 */
1843 movlpd (%ecx), %xmm0
1844 movlpd %xmm0, (%edx)
1845 movlpd 8(%ecx), %xmm0
1846 movlpd %xmm0, 8(%edx)
1847# ifdef USE_AS_STPCPY
1848 lea 15(%edx), %eax
1849# else
1850 movl %edi, %eax
1851# endif
1852 RETURN
1853
1854 .p2align 4
1855L(Exit1):
1856 movb (%ecx), %al
1857 movb %al, (%edx)
1858# ifdef USE_AS_STPCPY
1859 lea (%edx), %eax
1860# else
1861 movl %edi, %eax
1862# endif
1863 RETURN
1864
1865 .p2align 4
1866L(Exit2):
1867 movw (%ecx), %ax
1868 movw %ax, (%edx)
1869# ifdef USE_AS_STPCPY
1870 lea 1(%edx), %eax
1871# else
1872 movl %edi, %eax
1873# endif
1874 RETURN
1875
1876 .p2align 4
1877L(Exit3):
1878 movw (%ecx), %ax
1879 movw %ax, (%edx)
1880 movb 2(%ecx), %al
1881 movb %al, 2(%edx)
1882# ifdef USE_AS_STPCPY
1883 lea 2(%edx), %eax
1884# else
1885 movl %edi, %eax
1886# endif
1887 RETURN
1888
1889 .p2align 4
1890L(Exit4):
1891 movl (%ecx), %eax
1892 movl %eax, (%edx)
1893# ifdef USE_AS_STPCPY
1894 lea 3(%edx), %eax
1895# else
1896 movl %edi, %eax
1897# endif
1898 RETURN
1899
1900 .p2align 4
1901L(Exit5):
1902 movl (%ecx), %eax
1903 movl %eax, (%edx)
1904 movb 4(%ecx), %al
1905 movb %al, 4(%edx)
1906# ifdef USE_AS_STPCPY
1907 lea 4(%edx), %eax
1908# else
1909 movl %edi, %eax
1910# endif
1911 RETURN
1912
1913 .p2align 4
1914L(Exit6):
1915 movl (%ecx), %eax
1916 movl %eax, (%edx)
1917 movw 4(%ecx), %ax
1918 movw %ax, 4(%edx)
1919# ifdef USE_AS_STPCPY
1920 lea 5(%edx), %eax
1921# else
1922 movl %edi, %eax
1923# endif
1924 RETURN
1925
1926 .p2align 4
1927L(Exit7):
1928 movl (%ecx), %eax
1929 movl %eax, (%edx)
1930 movl 3(%ecx), %eax
1931 movl %eax, 3(%edx)
1932# ifdef USE_AS_STPCPY
1933 lea 6(%edx), %eax
1934# else
1935 movl %edi, %eax
1936# endif
1937 RETURN
1938
1939 .p2align 4
1940L(Exit9):
1941 movl (%ecx), %eax
1942 movl %eax, (%edx)
1943 movl 4(%ecx), %eax
1944 movl %eax, 4(%edx)
1945 movb 8(%ecx), %al
1946 movb %al, 8(%edx)
1947# ifdef USE_AS_STPCPY
1948 lea 8(%edx), %eax
1949# else
1950 movl %edi, %eax
1951# endif
1952 RETURN
1953
1954 .p2align 4
1955L(Exit10):
1956 movl (%ecx), %eax
1957 movl %eax, (%edx)
1958 movl 4(%ecx), %eax
1959 movl %eax, 4(%edx)
1960 movw 8(%ecx), %ax
1961 movw %ax, 8(%edx)
1962# ifdef USE_AS_STPCPY
1963 lea 9(%edx), %eax
1964# else
1965 movl %edi, %eax
1966# endif
1967 RETURN
1968
1969 .p2align 4
1970L(Exit11):
1971 movl (%ecx), %eax
1972 movl %eax, (%edx)
1973 movl 4(%ecx), %eax
1974 movl %eax, 4(%edx)
1975 movl 7(%ecx), %eax
1976 movl %eax, 7(%edx)
1977# ifdef USE_AS_STPCPY
1978 lea 10(%edx), %eax
1979# else
1980 movl %edi, %eax
1981# endif
1982 RETURN
1983
1984 .p2align 4
1985L(Exit12):
1986 movl (%ecx), %eax
1987 movl %eax, (%edx)
1988 movl 4(%ecx), %eax
1989 movl %eax, 4(%edx)
1990 movl 8(%ecx), %eax
1991 movl %eax, 8(%edx)
1992# ifdef USE_AS_STPCPY
1993 lea 11(%edx), %eax
1994# else
1995 movl %edi, %eax
1996# endif
1997 RETURN
1998
1999 .p2align 4
2000L(Exit13):
2001 movlpd (%ecx), %xmm0
2002 movlpd %xmm0, (%edx)
2003 movlpd 5(%ecx), %xmm0
2004 movlpd %xmm0, 5(%edx)
2005# ifdef USE_AS_STPCPY
2006 lea 12(%edx), %eax
2007# else
2008 movl %edi, %eax
2009# endif
2010 RETURN
2011
2012 .p2align 4
2013L(Exit14):
2014 movlpd (%ecx), %xmm0
2015 movlpd %xmm0, (%edx)
2016 movlpd 6(%ecx), %xmm0
2017 movlpd %xmm0, 6(%edx)
2018# ifdef USE_AS_STPCPY
2019 lea 13(%edx), %eax
2020# else
2021 movl %edi, %eax
2022# endif
2023 RETURN
2024
2025 .p2align 4
2026L(Exit15):
2027 movlpd (%ecx), %xmm0
2028 movlpd %xmm0, (%edx)
2029 movlpd 7(%ecx), %xmm0
2030 movlpd %xmm0, 7(%edx)
2031# ifdef USE_AS_STPCPY
2032 lea 14(%edx), %eax
2033# else
2034 movl %edi, %eax
2035# endif
2036 RETURN
2037
2038CFI_POP (%edi)
2039
2040 .p2align 4
2041L(ExitTail1):
2042 movb (%ecx), %al
2043 movb %al, (%edx)
2044 movl %edx, %eax
2045 RETURN1
2046
2047 .p2align 4
2048L(ExitTail2):
2049 movw (%ecx), %ax
2050 movw %ax, (%edx)
2051# ifdef USE_AS_STPCPY
2052 lea 1(%edx), %eax
2053# else
2054 movl %edx, %eax
2055# endif
2056 RETURN1
2057
2058 .p2align 4
2059L(ExitTail3):
2060 movw (%ecx), %ax
2061 movw %ax, (%edx)
2062 movb 2(%ecx), %al
2063 movb %al, 2(%edx)
2064# ifdef USE_AS_STPCPY
2065 lea 2(%edx), %eax
2066# else
2067 movl %edx, %eax
2068# endif
2069 RETURN1
2070
2071 .p2align 4
2072L(ExitTail4):
2073 movl (%ecx), %eax
2074 movl %eax, (%edx)
2075# ifdef USE_AS_STPCPY
2076 lea 3(%edx), %eax
2077# else
2078 movl %edx, %eax
2079# endif
2080 RETURN1
2081
2082 .p2align 4
2083L(ExitTail5):
2084 movl (%ecx), %eax
2085 movl %eax, (%edx)
2086 movb 4(%ecx), %al
2087 movb %al, 4(%edx)
2088# ifdef USE_AS_STPCPY
2089 lea 4(%edx), %eax
2090# else
2091 movl %edx, %eax
2092# endif
2093 RETURN1
2094
2095 .p2align 4
2096L(ExitTail6):
2097 movl (%ecx), %eax
2098 movl %eax, (%edx)
2099 movw 4(%ecx), %ax
2100 movw %ax, 4(%edx)
2101# ifdef USE_AS_STPCPY
2102 lea 5(%edx), %eax
2103# else
2104 movl %edx, %eax
2105# endif
2106 RETURN1
2107
2108 .p2align 4
2109L(ExitTail7):
2110 movl (%ecx), %eax
2111 movl %eax, (%edx)
2112 movl 3(%ecx), %eax
2113 movl %eax, 3(%edx)
2114# ifdef USE_AS_STPCPY
2115 lea 6(%edx), %eax
2116# else
2117 movl %edx, %eax
2118# endif
2119 RETURN1
2120
2121 .p2align 4
2122L(ExitTail8):
2123 movl (%ecx), %eax
2124 movl %eax, (%edx)
2125 movl 4(%ecx), %eax
2126 movl %eax, 4(%edx)
2127# ifdef USE_AS_STPCPY
2128 lea 7(%edx), %eax
2129# else
2130 movl %edx, %eax
2131# endif
2132 RETURN1
2133
2134 .p2align 4
2135L(ExitTail9):
2136 movl (%ecx), %eax
2137 movl %eax, (%edx)
2138 movl 4(%ecx), %eax
2139 movl %eax, 4(%edx)
2140 movb 8(%ecx), %al
2141 movb %al, 8(%edx)
2142# ifdef USE_AS_STPCPY
2143 lea 8(%edx), %eax
2144# else
2145 movl %edx, %eax
2146# endif
2147 RETURN1
2148
2149 .p2align 4
2150L(ExitTail10):
2151 movl (%ecx), %eax
2152 movl %eax, (%edx)
2153 movl 4(%ecx), %eax
2154 movl %eax, 4(%edx)
2155 movw 8(%ecx), %ax
2156 movw %ax, 8(%edx)
2157# ifdef USE_AS_STPCPY
2158 lea 9(%edx), %eax
2159# else
2160 movl %edx, %eax
2161# endif
2162 RETURN1
2163
2164 .p2align 4
2165L(ExitTail11):
2166 movl (%ecx), %eax
2167 movl %eax, (%edx)
2168 movl 4(%ecx), %eax
2169 movl %eax, 4(%edx)
2170 movl 7(%ecx), %eax
2171 movl %eax, 7(%edx)
2172# ifdef USE_AS_STPCPY
2173 lea 10(%edx), %eax
2174# else
2175 movl %edx, %eax
2176# endif
2177 RETURN1
2178
2179 .p2align 4
2180L(ExitTail12):
2181 movl (%ecx), %eax
2182 movl %eax, (%edx)
2183 movl 4(%ecx), %eax
2184 movl %eax, 4(%edx)
2185 movl 8(%ecx), %eax
2186 movl %eax, 8(%edx)
2187# ifdef USE_AS_STPCPY
2188 lea 11(%edx), %eax
2189# else
2190 movl %edx, %eax
2191# endif
2192 RETURN1
2193
2194 .p2align 4
2195L(ExitTail13):
2196 movlpd (%ecx), %xmm0
2197 movlpd %xmm0, (%edx)
2198 movlpd 5(%ecx), %xmm0
2199 movlpd %xmm0, 5(%edx)
2200# ifdef USE_AS_STPCPY
2201 lea 12(%edx), %eax
2202# else
2203 movl %edx, %eax
2204# endif
2205 RETURN1
2206
2207 .p2align 4
2208L(ExitTail14):
2209 movlpd (%ecx), %xmm0
2210 movlpd %xmm0, (%edx)
2211 movlpd 6(%ecx), %xmm0
2212 movlpd %xmm0, 6(%edx)
2213# ifdef USE_AS_STPCPY
2214 lea 13(%edx), %eax
2215# else
2216 movl %edx, %eax
2217# endif
2218 RETURN1
2219
2220 .p2align 4
2221L(ExitTail15):
2222 movlpd (%ecx), %xmm0
2223 movlpd %xmm0, (%edx)
2224 movlpd 7(%ecx), %xmm0
2225 movlpd %xmm0, 7(%edx)
2226# ifdef USE_AS_STPCPY
2227 lea 14(%edx), %eax
2228# else
2229 movl %edx, %eax
2230# endif
2231 RETURN1
2232
2233 .p2align 4
2234L(ExitTail16):
2235 movlpd (%ecx), %xmm0
2236 movlpd %xmm0, (%edx)
2237 movlpd 8(%ecx), %xmm0
2238 movlpd %xmm0, 8(%edx)
2239# ifdef USE_AS_STPCPY
2240 lea 15(%edx), %eax
2241# else
2242 movl %edx, %eax
2243# endif
2244 RETURN1
2245
2246END (STRCPY)
2247# endif
2248
2249#endif
2250

source code of glibc/sysdeps/i386/i686/multiarch/strcpy-sse2.S