1/* strcpy with SSSE3
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19
20#if IS_IN (libc)
21
22# ifndef USE_AS_STRCAT
23# include <sysdep.h>
24
25# define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
28
29# define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
31 cfi_restore (REG)
32
33# define PUSH(REG) pushl REG; CFI_PUSH (REG)
34# define POP(REG) popl REG; CFI_POP (REG)
35
36# ifndef STRCPY
37# define STRCPY __strcpy_ssse3
38# endif
39
40# ifdef USE_AS_STRNCPY
41# define PARMS 8
42# define ENTRANCE PUSH (%ebx)
43# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
44# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
45# else
46# define PARMS 4
47# define ENTRANCE
48# define RETURN ret
49# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
50# endif
51
52# ifdef USE_AS_STPCPY
53# define SAVE_RESULT(n) lea n(%edx), %eax
54# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
55# else
56# define SAVE_RESULT(n) movl %edi, %eax
57# define SAVE_RESULT_TAIL(n) movl %edx, %eax
58# endif
59
60# define STR1 PARMS
61# define STR2 STR1+4
62# define LEN STR2+4
63
64/* In this code following instructions are used for copying:
65 movb - 1 byte
66 movw - 2 byte
67 movl - 4 byte
68 movlpd - 8 byte
69 movaps - 16 byte - requires 16 byte alignment
70 of source and destination addresses.
71*/
72
73.text
74ENTRY (STRCPY)
75 ENTRANCE
76 mov STR1(%esp), %edx
77 mov STR2(%esp), %ecx
78# ifdef USE_AS_STRNCPY
79 movl LEN(%esp), %ebx
80 cmp $8, %ebx
81 jbe L(StrncpyExit8Bytes)
82# endif
83 cmpb $0, (%ecx)
84 jz L(ExitTail1)
85 cmpb $0, 1(%ecx)
86 jz L(ExitTail2)
87 cmpb $0, 2(%ecx)
88 jz L(ExitTail3)
89 cmpb $0, 3(%ecx)
90 jz L(ExitTail4)
91 cmpb $0, 4(%ecx)
92 jz L(ExitTail5)
93 cmpb $0, 5(%ecx)
94 jz L(ExitTail6)
95 cmpb $0, 6(%ecx)
96 jz L(ExitTail7)
97 cmpb $0, 7(%ecx)
98 jz L(ExitTail8)
99# ifdef USE_AS_STRNCPY
100 cmp $16, %ebx
101 jb L(StrncpyExit15Bytes)
102# endif
103 cmpb $0, 8(%ecx)
104 jz L(ExitTail9)
105 cmpb $0, 9(%ecx)
106 jz L(ExitTail10)
107 cmpb $0, 10(%ecx)
108 jz L(ExitTail11)
109 cmpb $0, 11(%ecx)
110 jz L(ExitTail12)
111 cmpb $0, 12(%ecx)
112 jz L(ExitTail13)
113 cmpb $0, 13(%ecx)
114 jz L(ExitTail14)
115 cmpb $0, 14(%ecx)
116 jz L(ExitTail15)
117# ifdef USE_AS_STRNCPY
118 cmp $16, %ebx
119 je L(ExitTail16)
120# endif
121 cmpb $0, 15(%ecx)
122 jz L(ExitTail16)
123
124 PUSH (%edi)
125 mov %edx, %edi
126# endif
127 PUSH (%esi)
128# ifdef USE_AS_STRNCPY
129 mov %ecx, %esi
130 sub $16, %ebx
131 and $0xf, %esi
132
133/* add 16 bytes ecx_offset to ebx */
134
135 add %esi, %ebx
136# endif
137 lea 16(%ecx), %esi
138 and $-16, %esi
139 pxor %xmm0, %xmm0
140 movlpd (%ecx), %xmm1
141 movlpd %xmm1, (%edx)
142
143 pcmpeqb (%esi), %xmm0
144 movlpd 8(%ecx), %xmm1
145 movlpd %xmm1, 8(%edx)
146
147 pmovmskb %xmm0, %eax
148 sub %ecx, %esi
149
150# ifdef USE_AS_STRNCPY
151 sub $16, %ebx
152 jbe L(CopyFrom1To16BytesCase2OrCase3)
153# endif
154 test %eax, %eax
155 jnz L(CopyFrom1To16Bytes)
156
157 mov %edx, %eax
158 lea 16(%edx), %edx
159 and $-16, %edx
160 sub %edx, %eax
161
162# ifdef USE_AS_STRNCPY
163 add %eax, %esi
164 lea -1(%esi), %esi
165 and $1<<31, %esi
166 test %esi, %esi
167 jnz L(ContinueCopy)
168 lea 16(%ebx), %ebx
169
170L(ContinueCopy):
171# endif
172 sub %eax, %ecx
173 mov %ecx, %eax
174 and $0xf, %eax
175 mov $0, %esi
176
177/* case: ecx_offset == edx_offset */
178
179 jz L(Align16Both)
180
181 cmp $8, %eax
182 jae L(ShlHigh8)
183 cmp $1, %eax
184 je L(Shl1)
185 cmp $2, %eax
186 je L(Shl2)
187 cmp $3, %eax
188 je L(Shl3)
189 cmp $4, %eax
190 je L(Shl4)
191 cmp $5, %eax
192 je L(Shl5)
193 cmp $6, %eax
194 je L(Shl6)
195 jmp L(Shl7)
196
197L(ShlHigh8):
198 je L(Shl8)
199 cmp $9, %eax
200 je L(Shl9)
201 cmp $10, %eax
202 je L(Shl10)
203 cmp $11, %eax
204 je L(Shl11)
205 cmp $12, %eax
206 je L(Shl12)
207 cmp $13, %eax
208 je L(Shl13)
209 cmp $14, %eax
210 je L(Shl14)
211 jmp L(Shl15)
212
213L(Align16Both):
214 movaps (%ecx), %xmm1
215 movaps 16(%ecx), %xmm2
216 movaps %xmm1, (%edx)
217 pcmpeqb %xmm2, %xmm0
218 pmovmskb %xmm0, %eax
219 lea 16(%esi), %esi
220# ifdef USE_AS_STRNCPY
221 sub $16, %ebx
222 jbe L(CopyFrom1To16BytesCase2OrCase3)
223# endif
224 test %eax, %eax
225 jnz L(CopyFrom1To16Bytes)
226
227 movaps 16(%ecx, %esi), %xmm3
228 movaps %xmm2, (%edx, %esi)
229 pcmpeqb %xmm3, %xmm0
230 pmovmskb %xmm0, %eax
231 lea 16(%esi), %esi
232# ifdef USE_AS_STRNCPY
233 sub $16, %ebx
234 jbe L(CopyFrom1To16BytesCase2OrCase3)
235# endif
236 test %eax, %eax
237 jnz L(CopyFrom1To16Bytes)
238
239 movaps 16(%ecx, %esi), %xmm4
240 movaps %xmm3, (%edx, %esi)
241 pcmpeqb %xmm4, %xmm0
242 pmovmskb %xmm0, %eax
243 lea 16(%esi), %esi
244# ifdef USE_AS_STRNCPY
245 sub $16, %ebx
246 jbe L(CopyFrom1To16BytesCase2OrCase3)
247# endif
248 test %eax, %eax
249 jnz L(CopyFrom1To16Bytes)
250
251 movaps 16(%ecx, %esi), %xmm1
252 movaps %xmm4, (%edx, %esi)
253 pcmpeqb %xmm1, %xmm0
254 pmovmskb %xmm0, %eax
255 lea 16(%esi), %esi
256# ifdef USE_AS_STRNCPY
257 sub $16, %ebx
258 jbe L(CopyFrom1To16BytesCase2OrCase3)
259# endif
260 test %eax, %eax
261 jnz L(CopyFrom1To16Bytes)
262
263 movaps 16(%ecx, %esi), %xmm2
264 movaps %xmm1, (%edx, %esi)
265 pcmpeqb %xmm2, %xmm0
266 pmovmskb %xmm0, %eax
267 lea 16(%esi), %esi
268# ifdef USE_AS_STRNCPY
269 sub $16, %ebx
270 jbe L(CopyFrom1To16BytesCase2OrCase3)
271# endif
272 test %eax, %eax
273 jnz L(CopyFrom1To16Bytes)
274
275 movaps 16(%ecx, %esi), %xmm3
276 movaps %xmm2, (%edx, %esi)
277 pcmpeqb %xmm3, %xmm0
278 pmovmskb %xmm0, %eax
279 lea 16(%esi), %esi
280# ifdef USE_AS_STRNCPY
281 sub $16, %ebx
282 jbe L(CopyFrom1To16BytesCase2OrCase3)
283# endif
284 test %eax, %eax
285 jnz L(CopyFrom1To16Bytes)
286
287 movaps %xmm3, (%edx, %esi)
288 mov %ecx, %eax
289 lea 16(%ecx, %esi), %ecx
290 and $-0x40, %ecx
291 sub %ecx, %eax
292 sub %eax, %edx
293# ifdef USE_AS_STRNCPY
294 lea 112(%ebx, %eax), %ebx
295# endif
296 mov $-0x40, %esi
297
298L(Aligned64Loop):
299 movaps (%ecx), %xmm2
300 movaps 32(%ecx), %xmm3
301 movaps %xmm2, %xmm4
302 movaps 16(%ecx), %xmm5
303 movaps %xmm3, %xmm6
304 movaps 48(%ecx), %xmm7
305 pminub %xmm5, %xmm2
306 pminub %xmm7, %xmm3
307 pminub %xmm2, %xmm3
308 lea 64(%edx), %edx
309 pcmpeqb %xmm0, %xmm3
310 lea 64(%ecx), %ecx
311 pmovmskb %xmm3, %eax
312# ifdef USE_AS_STRNCPY
313 sub $64, %ebx
314 jbe L(StrncpyLeaveCase2OrCase3)
315# endif
316 test %eax, %eax
317 jnz L(Aligned64Leave)
318 movaps %xmm4, -64(%edx)
319 movaps %xmm5, -48(%edx)
320 movaps %xmm6, -32(%edx)
321 movaps %xmm7, -16(%edx)
322 jmp L(Aligned64Loop)
323
324L(Aligned64Leave):
325# ifdef USE_AS_STRNCPY
326 lea 48(%ebx), %ebx
327# endif
328 pcmpeqb %xmm4, %xmm0
329 pmovmskb %xmm0, %eax
330 test %eax, %eax
331 jnz L(CopyFrom1To16Bytes)
332
333 pcmpeqb %xmm5, %xmm0
334# ifdef USE_AS_STRNCPY
335 lea -16(%ebx), %ebx
336# endif
337 pmovmskb %xmm0, %eax
338 movaps %xmm4, -64(%edx)
339 test %eax, %eax
340 lea 16(%esi), %esi
341 jnz L(CopyFrom1To16Bytes)
342
343 pcmpeqb %xmm6, %xmm0
344# ifdef USE_AS_STRNCPY
345 lea -16(%ebx), %ebx
346# endif
347 pmovmskb %xmm0, %eax
348 movaps %xmm5, -48(%edx)
349 test %eax, %eax
350 lea 16(%esi), %esi
351 jnz L(CopyFrom1To16Bytes)
352
353 movaps %xmm6, -32(%edx)
354 pcmpeqb %xmm7, %xmm0
355# ifdef USE_AS_STRNCPY
356 lea -16(%ebx), %ebx
357# endif
358 pmovmskb %xmm0, %eax
359 lea 16(%esi), %esi
360 jmp L(CopyFrom1To16Bytes)
361
362 .p2align 4
363L(Shl1):
364 movaps -1(%ecx), %xmm1
365 movaps 15(%ecx), %xmm2
366L(Shl1Start):
367 pcmpeqb %xmm2, %xmm0
368 pmovmskb %xmm0, %eax
369 movaps %xmm2, %xmm3
370# ifdef USE_AS_STRNCPY
371 sub $16, %ebx
372 jbe L(StrncpyExit1Case2OrCase3)
373# endif
374 test %eax, %eax
375 jnz L(Shl1LoopExit)
376
377 palignr $1, %xmm1, %xmm2
378 movaps %xmm3, %xmm1
379 movaps %xmm2, (%edx)
380 movaps 31(%ecx), %xmm2
381
382 pcmpeqb %xmm2, %xmm0
383 lea 16(%edx), %edx
384 pmovmskb %xmm0, %eax
385 lea 16(%ecx), %ecx
386 movaps %xmm2, %xmm3
387# ifdef USE_AS_STRNCPY
388 sub $16, %ebx
389 jbe L(StrncpyExit1Case2OrCase3)
390# endif
391 test %eax, %eax
392 jnz L(Shl1LoopExit)
393
394 palignr $1, %xmm1, %xmm2
395 movaps %xmm2, (%edx)
396 movaps 31(%ecx), %xmm2
397 movaps %xmm3, %xmm1
398
399 pcmpeqb %xmm2, %xmm0
400 lea 16(%edx), %edx
401 pmovmskb %xmm0, %eax
402 lea 16(%ecx), %ecx
403 movaps %xmm2, %xmm3
404# ifdef USE_AS_STRNCPY
405 sub $16, %ebx
406 jbe L(StrncpyExit1Case2OrCase3)
407# endif
408 test %eax, %eax
409 jnz L(Shl1LoopExit)
410
411 palignr $1, %xmm1, %xmm2
412 movaps %xmm2, (%edx)
413 movaps 31(%ecx), %xmm2
414
415 pcmpeqb %xmm2, %xmm0
416 lea 16(%edx), %edx
417 pmovmskb %xmm0, %eax
418 lea 16(%ecx), %ecx
419# ifdef USE_AS_STRNCPY
420 sub $16, %ebx
421 jbe L(StrncpyExit1Case2OrCase3)
422# endif
423 test %eax, %eax
424 jnz L(Shl1LoopExit)
425
426 palignr $1, %xmm3, %xmm2
427 movaps %xmm2, (%edx)
428 lea 31(%ecx), %ecx
429 lea 16(%edx), %edx
430
431 mov %ecx, %eax
432 and $-0x40, %ecx
433 sub %ecx, %eax
434 lea -15(%ecx), %ecx
435 sub %eax, %edx
436# ifdef USE_AS_STRNCPY
437 add %eax, %ebx
438# endif
439 movaps -1(%ecx), %xmm1
440
441L(Shl1LoopStart):
442 movaps 15(%ecx), %xmm2
443 movaps 31(%ecx), %xmm3
444 movaps %xmm3, %xmm6
445 movaps 47(%ecx), %xmm4
446 movaps %xmm4, %xmm7
447 movaps 63(%ecx), %xmm5
448 pminub %xmm2, %xmm6
449 pminub %xmm5, %xmm7
450 pminub %xmm6, %xmm7
451 pcmpeqb %xmm0, %xmm7
452 pmovmskb %xmm7, %eax
453 movaps %xmm5, %xmm7
454 palignr $1, %xmm4, %xmm5
455 test %eax, %eax
456 palignr $1, %xmm3, %xmm4
457 jnz L(Shl1Start)
458# ifdef USE_AS_STRNCPY
459 sub $64, %ebx
460 jbe L(StrncpyLeave1)
461# endif
462 palignr $1, %xmm2, %xmm3
463 lea 64(%ecx), %ecx
464 palignr $1, %xmm1, %xmm2
465 movaps %xmm7, %xmm1
466 movaps %xmm5, 48(%edx)
467 movaps %xmm4, 32(%edx)
468 movaps %xmm3, 16(%edx)
469 movaps %xmm2, (%edx)
470 lea 64(%edx), %edx
471 jmp L(Shl1LoopStart)
472
473L(Shl1LoopExit):
474 movlpd (%ecx), %xmm0
475 movlpd %xmm0, (%edx)
476 movlpd 7(%ecx), %xmm0
477 movlpd %xmm0, 7(%edx)
478 mov $15, %esi
479 jmp L(CopyFrom1To16Bytes)
480
481 .p2align 4
482L(Shl2):
483 movaps -2(%ecx), %xmm1
484 movaps 14(%ecx), %xmm2
485L(Shl2Start):
486 pcmpeqb %xmm2, %xmm0
487 pmovmskb %xmm0, %eax
488 movaps %xmm2, %xmm3
489# ifdef USE_AS_STRNCPY
490 sub $16, %ebx
491 jbe L(StrncpyExit2Case2OrCase3)
492# endif
493 test %eax, %eax
494 jnz L(Shl2LoopExit)
495
496 palignr $2, %xmm1, %xmm2
497 movaps %xmm3, %xmm1
498 movaps %xmm2, (%edx)
499 movaps 30(%ecx), %xmm2
500
501 pcmpeqb %xmm2, %xmm0
502 lea 16(%edx), %edx
503 pmovmskb %xmm0, %eax
504 lea 16(%ecx), %ecx
505 movaps %xmm2, %xmm3
506# ifdef USE_AS_STRNCPY
507 sub $16, %ebx
508 jbe L(StrncpyExit2Case2OrCase3)
509# endif
510 test %eax, %eax
511 jnz L(Shl2LoopExit)
512
513 palignr $2, %xmm1, %xmm2
514 movaps %xmm2, (%edx)
515 movaps 30(%ecx), %xmm2
516 movaps %xmm3, %xmm1
517
518 pcmpeqb %xmm2, %xmm0
519 lea 16(%edx), %edx
520 pmovmskb %xmm0, %eax
521 lea 16(%ecx), %ecx
522 movaps %xmm2, %xmm3
523# ifdef USE_AS_STRNCPY
524 sub $16, %ebx
525 jbe L(StrncpyExit2Case2OrCase3)
526# endif
527 test %eax, %eax
528 jnz L(Shl2LoopExit)
529
530 palignr $2, %xmm1, %xmm2
531 movaps %xmm2, (%edx)
532 movaps 30(%ecx), %xmm2
533
534 pcmpeqb %xmm2, %xmm0
535 lea 16(%edx), %edx
536 pmovmskb %xmm0, %eax
537 lea 16(%ecx), %ecx
538# ifdef USE_AS_STRNCPY
539 sub $16, %ebx
540 jbe L(StrncpyExit2Case2OrCase3)
541# endif
542 test %eax, %eax
543 jnz L(Shl2LoopExit)
544
545 palignr $2, %xmm3, %xmm2
546 movaps %xmm2, (%edx)
547 lea 30(%ecx), %ecx
548 lea 16(%edx), %edx
549
550 mov %ecx, %eax
551 and $-0x40, %ecx
552 sub %ecx, %eax
553 lea -14(%ecx), %ecx
554 sub %eax, %edx
555# ifdef USE_AS_STRNCPY
556 add %eax, %ebx
557# endif
558 movaps -2(%ecx), %xmm1
559
560L(Shl2LoopStart):
561 movaps 14(%ecx), %xmm2
562 movaps 30(%ecx), %xmm3
563 movaps %xmm3, %xmm6
564 movaps 46(%ecx), %xmm4
565 movaps %xmm4, %xmm7
566 movaps 62(%ecx), %xmm5
567 pminub %xmm2, %xmm6
568 pminub %xmm5, %xmm7
569 pminub %xmm6, %xmm7
570 pcmpeqb %xmm0, %xmm7
571 pmovmskb %xmm7, %eax
572 movaps %xmm5, %xmm7
573 palignr $2, %xmm4, %xmm5
574 test %eax, %eax
575 palignr $2, %xmm3, %xmm4
576 jnz L(Shl2Start)
577# ifdef USE_AS_STRNCPY
578 sub $64, %ebx
579 jbe L(StrncpyLeave2)
580# endif
581 palignr $2, %xmm2, %xmm3
582 lea 64(%ecx), %ecx
583 palignr $2, %xmm1, %xmm2
584 movaps %xmm7, %xmm1
585 movaps %xmm5, 48(%edx)
586 movaps %xmm4, 32(%edx)
587 movaps %xmm3, 16(%edx)
588 movaps %xmm2, (%edx)
589 lea 64(%edx), %edx
590 jmp L(Shl2LoopStart)
591
592L(Shl2LoopExit):
593 movlpd (%ecx), %xmm0
594 movlpd 6(%ecx), %xmm1
595 movlpd %xmm0, (%edx)
596 movlpd %xmm1, 6(%edx)
597 mov $14, %esi
598 jmp L(CopyFrom1To16Bytes)
599
600 .p2align 4
601L(Shl3):
602 movaps -3(%ecx), %xmm1
603 movaps 13(%ecx), %xmm2
604L(Shl3Start):
605 pcmpeqb %xmm2, %xmm0
606 pmovmskb %xmm0, %eax
607 movaps %xmm2, %xmm3
608# ifdef USE_AS_STRNCPY
609 sub $16, %ebx
610 jbe L(StrncpyExit3Case2OrCase3)
611# endif
612 test %eax, %eax
613 jnz L(Shl3LoopExit)
614
615 palignr $3, %xmm1, %xmm2
616 movaps %xmm3, %xmm1
617 movaps %xmm2, (%edx)
618 movaps 29(%ecx), %xmm2
619
620 pcmpeqb %xmm2, %xmm0
621 lea 16(%edx), %edx
622 pmovmskb %xmm0, %eax
623 lea 16(%ecx), %ecx
624 movaps %xmm2, %xmm3
625# ifdef USE_AS_STRNCPY
626 sub $16, %ebx
627 jbe L(StrncpyExit3Case2OrCase3)
628# endif
629 test %eax, %eax
630 jnz L(Shl3LoopExit)
631
632 palignr $3, %xmm1, %xmm2
633 movaps %xmm2, (%edx)
634 movaps 29(%ecx), %xmm2
635 movaps %xmm3, %xmm1
636
637 pcmpeqb %xmm2, %xmm0
638 lea 16(%edx), %edx
639 pmovmskb %xmm0, %eax
640 lea 16(%ecx), %ecx
641 movaps %xmm2, %xmm3
642# ifdef USE_AS_STRNCPY
643 sub $16, %ebx
644 jbe L(StrncpyExit3Case2OrCase3)
645# endif
646 test %eax, %eax
647 jnz L(Shl3LoopExit)
648
649 palignr $3, %xmm1, %xmm2
650 movaps %xmm2, (%edx)
651 movaps 29(%ecx), %xmm2
652
653 pcmpeqb %xmm2, %xmm0
654 lea 16(%edx), %edx
655 pmovmskb %xmm0, %eax
656 lea 16(%ecx), %ecx
657# ifdef USE_AS_STRNCPY
658 sub $16, %ebx
659 jbe L(StrncpyExit3Case2OrCase3)
660# endif
661 test %eax, %eax
662 jnz L(Shl3LoopExit)
663
664 palignr $3, %xmm3, %xmm2
665 movaps %xmm2, (%edx)
666 lea 29(%ecx), %ecx
667 lea 16(%edx), %edx
668
669 mov %ecx, %eax
670 and $-0x40, %ecx
671 sub %ecx, %eax
672 lea -13(%ecx), %ecx
673 sub %eax, %edx
674# ifdef USE_AS_STRNCPY
675 add %eax, %ebx
676# endif
677 movaps -3(%ecx), %xmm1
678
679L(Shl3LoopStart):
680 movaps 13(%ecx), %xmm2
681 movaps 29(%ecx), %xmm3
682 movaps %xmm3, %xmm6
683 movaps 45(%ecx), %xmm4
684 movaps %xmm4, %xmm7
685 movaps 61(%ecx), %xmm5
686 pminub %xmm2, %xmm6
687 pminub %xmm5, %xmm7
688 pminub %xmm6, %xmm7
689 pcmpeqb %xmm0, %xmm7
690 pmovmskb %xmm7, %eax
691 movaps %xmm5, %xmm7
692 palignr $3, %xmm4, %xmm5
693 test %eax, %eax
694 palignr $3, %xmm3, %xmm4
695 jnz L(Shl3Start)
696# ifdef USE_AS_STRNCPY
697 sub $64, %ebx
698 jbe L(StrncpyLeave3)
699# endif
700 palignr $3, %xmm2, %xmm3
701 lea 64(%ecx), %ecx
702 palignr $3, %xmm1, %xmm2
703 movaps %xmm7, %xmm1
704 movaps %xmm5, 48(%edx)
705 movaps %xmm4, 32(%edx)
706 movaps %xmm3, 16(%edx)
707 movaps %xmm2, (%edx)
708 lea 64(%edx), %edx
709 jmp L(Shl3LoopStart)
710
711L(Shl3LoopExit):
712 movlpd (%ecx), %xmm0
713 movlpd 5(%ecx), %xmm1
714 movlpd %xmm0, (%edx)
715 movlpd %xmm1, 5(%edx)
716 mov $13, %esi
717 jmp L(CopyFrom1To16Bytes)
718
719 .p2align 4
720L(Shl4):
721 movaps -4(%ecx), %xmm1
722 movaps 12(%ecx), %xmm2
723L(Shl4Start):
724 pcmpeqb %xmm2, %xmm0
725 pmovmskb %xmm0, %eax
726 movaps %xmm2, %xmm3
727# ifdef USE_AS_STRNCPY
728 sub $16, %ebx
729 jbe L(StrncpyExit4Case2OrCase3)
730# endif
731 test %eax, %eax
732 jnz L(Shl4LoopExit)
733
734 palignr $4, %xmm1, %xmm2
735 movaps %xmm3, %xmm1
736 movaps %xmm2, (%edx)
737 movaps 28(%ecx), %xmm2
738
739 pcmpeqb %xmm2, %xmm0
740 lea 16(%edx), %edx
741 pmovmskb %xmm0, %eax
742 lea 16(%ecx), %ecx
743 movaps %xmm2, %xmm3
744# ifdef USE_AS_STRNCPY
745 sub $16, %ebx
746 jbe L(StrncpyExit4Case2OrCase3)
747# endif
748 test %eax, %eax
749 jnz L(Shl4LoopExit)
750
751 palignr $4, %xmm1, %xmm2
752 movaps %xmm2, (%edx)
753 movaps 28(%ecx), %xmm2
754 movaps %xmm3, %xmm1
755
756 pcmpeqb %xmm2, %xmm0
757 lea 16(%edx), %edx
758 pmovmskb %xmm0, %eax
759 lea 16(%ecx), %ecx
760 movaps %xmm2, %xmm3
761# ifdef USE_AS_STRNCPY
762 sub $16, %ebx
763 jbe L(StrncpyExit4Case2OrCase3)
764# endif
765 test %eax, %eax
766 jnz L(Shl4LoopExit)
767
768 palignr $4, %xmm1, %xmm2
769 movaps %xmm2, (%edx)
770 movaps 28(%ecx), %xmm2
771
772 pcmpeqb %xmm2, %xmm0
773 lea 16(%edx), %edx
774 pmovmskb %xmm0, %eax
775 lea 16(%ecx), %ecx
776# ifdef USE_AS_STRNCPY
777 sub $16, %ebx
778 jbe L(StrncpyExit4Case2OrCase3)
779# endif
780 test %eax, %eax
781 jnz L(Shl4LoopExit)
782
783 palignr $4, %xmm3, %xmm2
784 movaps %xmm2, (%edx)
785 lea 28(%ecx), %ecx
786 lea 16(%edx), %edx
787
788 mov %ecx, %eax
789 and $-0x40, %ecx
790 sub %ecx, %eax
791 lea -12(%ecx), %ecx
792 sub %eax, %edx
793# ifdef USE_AS_STRNCPY
794 add %eax, %ebx
795# endif
796 movaps -4(%ecx), %xmm1
797
798L(Shl4LoopStart):
799 movaps 12(%ecx), %xmm2
800 movaps 28(%ecx), %xmm3
801 movaps %xmm3, %xmm6
802 movaps 44(%ecx), %xmm4
803 movaps %xmm4, %xmm7
804 movaps 60(%ecx), %xmm5
805 pminub %xmm2, %xmm6
806 pminub %xmm5, %xmm7
807 pminub %xmm6, %xmm7
808 pcmpeqb %xmm0, %xmm7
809 pmovmskb %xmm7, %eax
810 movaps %xmm5, %xmm7
811 palignr $4, %xmm4, %xmm5
812 test %eax, %eax
813 palignr $4, %xmm3, %xmm4
814 jnz L(Shl4Start)
815# ifdef USE_AS_STRNCPY
816 sub $64, %ebx
817 jbe L(StrncpyLeave4)
818# endif
819 palignr $4, %xmm2, %xmm3
820 lea 64(%ecx), %ecx
821 palignr $4, %xmm1, %xmm2
822 movaps %xmm7, %xmm1
823 movaps %xmm5, 48(%edx)
824 movaps %xmm4, 32(%edx)
825 movaps %xmm3, 16(%edx)
826 movaps %xmm2, (%edx)
827 lea 64(%edx), %edx
828 jmp L(Shl4LoopStart)
829
830L(Shl4LoopExit):
831 movlpd (%ecx), %xmm0
832 movl 8(%ecx), %esi
833 movlpd %xmm0, (%edx)
834 movl %esi, 8(%edx)
835 mov $12, %esi
836 jmp L(CopyFrom1To16Bytes)
837
838 .p2align 4
839L(Shl5):
840 movaps -5(%ecx), %xmm1
841 movaps 11(%ecx), %xmm2
842L(Shl5Start):
843 pcmpeqb %xmm2, %xmm0
844 pmovmskb %xmm0, %eax
845 movaps %xmm2, %xmm3
846# ifdef USE_AS_STRNCPY
847 sub $16, %ebx
848 jbe L(StrncpyExit5Case2OrCase3)
849# endif
850 test %eax, %eax
851 jnz L(Shl5LoopExit)
852
853 palignr $5, %xmm1, %xmm2
854 movaps %xmm3, %xmm1
855 movaps %xmm2, (%edx)
856 movaps 27(%ecx), %xmm2
857
858 pcmpeqb %xmm2, %xmm0
859 lea 16(%edx), %edx
860 pmovmskb %xmm0, %eax
861 lea 16(%ecx), %ecx
862 movaps %xmm2, %xmm3
863# ifdef USE_AS_STRNCPY
864 sub $16, %ebx
865 jbe L(StrncpyExit5Case2OrCase3)
866# endif
867 test %eax, %eax
868 jnz L(Shl5LoopExit)
869
870 palignr $5, %xmm1, %xmm2
871 movaps %xmm2, (%edx)
872 movaps 27(%ecx), %xmm2
873 movaps %xmm3, %xmm1
874
875 pcmpeqb %xmm2, %xmm0
876 lea 16(%edx), %edx
877 pmovmskb %xmm0, %eax
878 lea 16(%ecx), %ecx
879 movaps %xmm2, %xmm3
880# ifdef USE_AS_STRNCPY
881 sub $16, %ebx
882 jbe L(StrncpyExit5Case2OrCase3)
883# endif
884 test %eax, %eax
885 jnz L(Shl5LoopExit)
886
887 palignr $5, %xmm1, %xmm2
888 movaps %xmm2, (%edx)
889 movaps 27(%ecx), %xmm2
890
891 pcmpeqb %xmm2, %xmm0
892 lea 16(%edx), %edx
893 pmovmskb %xmm0, %eax
894 lea 16(%ecx), %ecx
895# ifdef USE_AS_STRNCPY
896 sub $16, %ebx
897 jbe L(StrncpyExit5Case2OrCase3)
898# endif
899 test %eax, %eax
900 jnz L(Shl5LoopExit)
901
902 palignr $5, %xmm3, %xmm2
903 movaps %xmm2, (%edx)
904 lea 27(%ecx), %ecx
905 lea 16(%edx), %edx
906
907 mov %ecx, %eax
908 and $-0x40, %ecx
909 sub %ecx, %eax
910 lea -11(%ecx), %ecx
911 sub %eax, %edx
912# ifdef USE_AS_STRNCPY
913 add %eax, %ebx
914# endif
915 movaps -5(%ecx), %xmm1
916
917L(Shl5LoopStart):
918 movaps 11(%ecx), %xmm2
919 movaps 27(%ecx), %xmm3
920 movaps %xmm3, %xmm6
921 movaps 43(%ecx), %xmm4
922 movaps %xmm4, %xmm7
923 movaps 59(%ecx), %xmm5
924 pminub %xmm2, %xmm6
925 pminub %xmm5, %xmm7
926 pminub %xmm6, %xmm7
927 pcmpeqb %xmm0, %xmm7
928 pmovmskb %xmm7, %eax
929 movaps %xmm5, %xmm7
930 palignr $5, %xmm4, %xmm5
931 test %eax, %eax
932 palignr $5, %xmm3, %xmm4
933 jnz L(Shl5Start)
934# ifdef USE_AS_STRNCPY
935 sub $64, %ebx
936 jbe L(StrncpyLeave5)
937# endif
938 palignr $5, %xmm2, %xmm3
939 lea 64(%ecx), %ecx
940 palignr $5, %xmm1, %xmm2
941 movaps %xmm7, %xmm1
942 movaps %xmm5, 48(%edx)
943 movaps %xmm4, 32(%edx)
944 movaps %xmm3, 16(%edx)
945 movaps %xmm2, (%edx)
946 lea 64(%edx), %edx
947 jmp L(Shl5LoopStart)
948
949L(Shl5LoopExit):
950 movlpd (%ecx), %xmm0
951 movl 7(%ecx), %esi
952 movlpd %xmm0, (%edx)
953 movl %esi, 7(%edx)
954 mov $11, %esi
955 jmp L(CopyFrom1To16Bytes)
956
957 .p2align 4
958L(Shl6):
959 movaps -6(%ecx), %xmm1
960 movaps 10(%ecx), %xmm2
961L(Shl6Start):
962 pcmpeqb %xmm2, %xmm0
963 pmovmskb %xmm0, %eax
964 movaps %xmm2, %xmm3
965# ifdef USE_AS_STRNCPY
966 sub $16, %ebx
967 jbe L(StrncpyExit6Case2OrCase3)
968# endif
969 test %eax, %eax
970 jnz L(Shl6LoopExit)
971
972 palignr $6, %xmm1, %xmm2
973 movaps %xmm3, %xmm1
974 movaps %xmm2, (%edx)
975 movaps 26(%ecx), %xmm2
976
977 pcmpeqb %xmm2, %xmm0
978 lea 16(%edx), %edx
979 pmovmskb %xmm0, %eax
980 lea 16(%ecx), %ecx
981 movaps %xmm2, %xmm3
982# ifdef USE_AS_STRNCPY
983 sub $16, %ebx
984 jbe L(StrncpyExit6Case2OrCase3)
985# endif
986 test %eax, %eax
987 jnz L(Shl6LoopExit)
988
989 palignr $6, %xmm1, %xmm2
990 movaps %xmm2, (%edx)
991 movaps 26(%ecx), %xmm2
992 movaps %xmm3, %xmm1
993
994 pcmpeqb %xmm2, %xmm0
995 lea 16(%edx), %edx
996 pmovmskb %xmm0, %eax
997 lea 16(%ecx), %ecx
998 movaps %xmm2, %xmm3
999# ifdef USE_AS_STRNCPY
1000 sub $16, %ebx
1001 jbe L(StrncpyExit6Case2OrCase3)
1002# endif
1003 test %eax, %eax
1004 jnz L(Shl6LoopExit)
1005
1006 palignr $6, %xmm1, %xmm2
1007 movaps %xmm2, (%edx)
1008 movaps 26(%ecx), %xmm2
1009
1010 pcmpeqb %xmm2, %xmm0
1011 lea 16(%edx), %edx
1012 pmovmskb %xmm0, %eax
1013 lea 16(%ecx), %ecx
1014# ifdef USE_AS_STRNCPY
1015 sub $16, %ebx
1016 jbe L(StrncpyExit6Case2OrCase3)
1017# endif
1018 test %eax, %eax
1019 jnz L(Shl6LoopExit)
1020
1021 palignr $6, %xmm3, %xmm2
1022 movaps %xmm2, (%edx)
1023 lea 26(%ecx), %ecx
1024 lea 16(%edx), %edx
1025
1026 mov %ecx, %eax
1027 and $-0x40, %ecx
1028 sub %ecx, %eax
1029 lea -10(%ecx), %ecx
1030 sub %eax, %edx
1031# ifdef USE_AS_STRNCPY
1032 add %eax, %ebx
1033# endif
1034 movaps -6(%ecx), %xmm1
1035
1036L(Shl6LoopStart):
1037 movaps 10(%ecx), %xmm2
1038 movaps 26(%ecx), %xmm3
1039 movaps %xmm3, %xmm6
1040 movaps 42(%ecx), %xmm4
1041 movaps %xmm4, %xmm7
1042 movaps 58(%ecx), %xmm5
1043 pminub %xmm2, %xmm6
1044 pminub %xmm5, %xmm7
1045 pminub %xmm6, %xmm7
1046 pcmpeqb %xmm0, %xmm7
1047 pmovmskb %xmm7, %eax
1048 movaps %xmm5, %xmm7
1049 palignr $6, %xmm4, %xmm5
1050 test %eax, %eax
1051 palignr $6, %xmm3, %xmm4
1052 jnz L(Shl6Start)
1053# ifdef USE_AS_STRNCPY
1054 sub $64, %ebx
1055 jbe L(StrncpyLeave6)
1056# endif
1057 palignr $6, %xmm2, %xmm3
1058 lea 64(%ecx), %ecx
1059 palignr $6, %xmm1, %xmm2
1060 movaps %xmm7, %xmm1
1061 movaps %xmm5, 48(%edx)
1062 movaps %xmm4, 32(%edx)
1063 movaps %xmm3, 16(%edx)
1064 movaps %xmm2, (%edx)
1065 lea 64(%edx), %edx
1066 jmp L(Shl6LoopStart)
1067
1068L(Shl6LoopExit):
1069 movlpd (%ecx), %xmm0
1070 movl 6(%ecx), %esi
1071 movlpd %xmm0, (%edx)
1072 movl %esi, 6(%edx)
1073 mov $10, %esi
1074 jmp L(CopyFrom1To16Bytes)
1075
1076 .p2align 4
1077L(Shl7):
1078 movaps -7(%ecx), %xmm1
1079 movaps 9(%ecx), %xmm2
1080L(Shl7Start):
1081 pcmpeqb %xmm2, %xmm0
1082 pmovmskb %xmm0, %eax
1083 movaps %xmm2, %xmm3
1084# ifdef USE_AS_STRNCPY
1085 sub $16, %ebx
1086 jbe L(StrncpyExit7Case2OrCase3)
1087# endif
1088 test %eax, %eax
1089 jnz L(Shl7LoopExit)
1090
1091 palignr $7, %xmm1, %xmm2
1092 movaps %xmm3, %xmm1
1093 movaps %xmm2, (%edx)
1094 movaps 25(%ecx), %xmm2
1095
1096 pcmpeqb %xmm2, %xmm0
1097 lea 16(%edx), %edx
1098 pmovmskb %xmm0, %eax
1099 lea 16(%ecx), %ecx
1100 movaps %xmm2, %xmm3
1101# ifdef USE_AS_STRNCPY
1102 sub $16, %ebx
1103 jbe L(StrncpyExit7Case2OrCase3)
1104# endif
1105 test %eax, %eax
1106 jnz L(Shl7LoopExit)
1107
1108 palignr $7, %xmm1, %xmm2
1109 movaps %xmm2, (%edx)
1110 movaps 25(%ecx), %xmm2
1111 movaps %xmm3, %xmm1
1112
1113 pcmpeqb %xmm2, %xmm0
1114 lea 16(%edx), %edx
1115 pmovmskb %xmm0, %eax
1116 lea 16(%ecx), %ecx
1117 movaps %xmm2, %xmm3
1118# ifdef USE_AS_STRNCPY
1119 sub $16, %ebx
1120 jbe L(StrncpyExit7Case2OrCase3)
1121# endif
1122 test %eax, %eax
1123 jnz L(Shl7LoopExit)
1124
1125 palignr $7, %xmm1, %xmm2
1126 movaps %xmm2, (%edx)
1127 movaps 25(%ecx), %xmm2
1128
1129 pcmpeqb %xmm2, %xmm0
1130 lea 16(%edx), %edx
1131 pmovmskb %xmm0, %eax
1132 lea 16(%ecx), %ecx
1133# ifdef USE_AS_STRNCPY
1134 sub $16, %ebx
1135 jbe L(StrncpyExit7Case2OrCase3)
1136# endif
1137 test %eax, %eax
1138 jnz L(Shl7LoopExit)
1139
1140 palignr $7, %xmm3, %xmm2
1141 movaps %xmm2, (%edx)
1142 lea 25(%ecx), %ecx
1143 lea 16(%edx), %edx
1144
1145 mov %ecx, %eax
1146 and $-0x40, %ecx
1147 sub %ecx, %eax
1148 lea -9(%ecx), %ecx
1149 sub %eax, %edx
1150# ifdef USE_AS_STRNCPY
1151 add %eax, %ebx
1152# endif
1153 movaps -7(%ecx), %xmm1
1154
1155L(Shl7LoopStart):
1156 movaps 9(%ecx), %xmm2
1157 movaps 25(%ecx), %xmm3
1158 movaps %xmm3, %xmm6
1159 movaps 41(%ecx), %xmm4
1160 movaps %xmm4, %xmm7
1161 movaps 57(%ecx), %xmm5
1162 pminub %xmm2, %xmm6
1163 pminub %xmm5, %xmm7
1164 pminub %xmm6, %xmm7
1165 pcmpeqb %xmm0, %xmm7
1166 pmovmskb %xmm7, %eax
1167 movaps %xmm5, %xmm7
1168 palignr $7, %xmm4, %xmm5
1169 test %eax, %eax
1170 palignr $7, %xmm3, %xmm4
1171 jnz L(Shl7Start)
1172# ifdef USE_AS_STRNCPY
1173 sub $64, %ebx
1174 jbe L(StrncpyLeave7)
1175# endif
1176 palignr $7, %xmm2, %xmm3
1177 lea 64(%ecx), %ecx
1178 palignr $7, %xmm1, %xmm2
1179 movaps %xmm7, %xmm1
1180 movaps %xmm5, 48(%edx)
1181 movaps %xmm4, 32(%edx)
1182 movaps %xmm3, 16(%edx)
1183 movaps %xmm2, (%edx)
1184 lea 64(%edx), %edx
1185 jmp L(Shl7LoopStart)
1186
1187L(Shl7LoopExit):
1188 movlpd (%ecx), %xmm0
1189 movl 5(%ecx), %esi
1190 movlpd %xmm0, (%edx)
1191 movl %esi, 5(%edx)
1192 mov $9, %esi
1193 jmp L(CopyFrom1To16Bytes)
1194
1195 .p2align 4
1196L(Shl8):
1197 movaps -8(%ecx), %xmm1
1198 movaps 8(%ecx), %xmm2
1199L(Shl8Start):
1200 pcmpeqb %xmm2, %xmm0
1201 pmovmskb %xmm0, %eax
1202 movaps %xmm2, %xmm3
1203# ifdef USE_AS_STRNCPY
1204 sub $16, %ebx
1205 jbe L(StrncpyExit8Case2OrCase3)
1206# endif
1207 test %eax, %eax
1208 jnz L(Shl8LoopExit)
1209
1210 palignr $8, %xmm1, %xmm2
1211 movaps %xmm3, %xmm1
1212 movaps %xmm2, (%edx)
1213 movaps 24(%ecx), %xmm2
1214
1215 pcmpeqb %xmm2, %xmm0
1216 lea 16(%edx), %edx
1217 pmovmskb %xmm0, %eax
1218 lea 16(%ecx), %ecx
1219 movaps %xmm2, %xmm3
1220# ifdef USE_AS_STRNCPY
1221 sub $16, %ebx
1222 jbe L(StrncpyExit8Case2OrCase3)
1223# endif
1224 test %eax, %eax
1225 jnz L(Shl8LoopExit)
1226
1227 palignr $8, %xmm1, %xmm2
1228 movaps %xmm2, (%edx)
1229 movaps 24(%ecx), %xmm2
1230 movaps %xmm3, %xmm1
1231
1232 pcmpeqb %xmm2, %xmm0
1233 lea 16(%edx), %edx
1234 pmovmskb %xmm0, %eax
1235 lea 16(%ecx), %ecx
1236 movaps %xmm2, %xmm3
1237# ifdef USE_AS_STRNCPY
1238 sub $16, %ebx
1239 jbe L(StrncpyExit8Case2OrCase3)
1240# endif
1241 test %eax, %eax
1242 jnz L(Shl8LoopExit)
1243
1244 palignr $8, %xmm1, %xmm2
1245 movaps %xmm2, (%edx)
1246 movaps 24(%ecx), %xmm2
1247
1248 pcmpeqb %xmm2, %xmm0
1249 lea 16(%edx), %edx
1250 pmovmskb %xmm0, %eax
1251 lea 16(%ecx), %ecx
1252# ifdef USE_AS_STRNCPY
1253 sub $16, %ebx
1254 jbe L(StrncpyExit8Case2OrCase3)
1255# endif
1256 test %eax, %eax
1257 jnz L(Shl8LoopExit)
1258
1259 palignr $8, %xmm3, %xmm2
1260 movaps %xmm2, (%edx)
1261 lea 24(%ecx), %ecx
1262 lea 16(%edx), %edx
1263
1264 mov %ecx, %eax
1265 and $-0x40, %ecx
1266 sub %ecx, %eax
1267 lea -8(%ecx), %ecx
1268 sub %eax, %edx
1269# ifdef USE_AS_STRNCPY
1270 add %eax, %ebx
1271# endif
1272 movaps -8(%ecx), %xmm1
1273
1274L(Shl8LoopStart):
1275 movaps 8(%ecx), %xmm2
1276 movaps 24(%ecx), %xmm3
1277 movaps %xmm3, %xmm6
1278 movaps 40(%ecx), %xmm4
1279 movaps %xmm4, %xmm7
1280 movaps 56(%ecx), %xmm5
1281 pminub %xmm2, %xmm6
1282 pminub %xmm5, %xmm7
1283 pminub %xmm6, %xmm7
1284 pcmpeqb %xmm0, %xmm7
1285 pmovmskb %xmm7, %eax
1286 movaps %xmm5, %xmm7
1287 palignr $8, %xmm4, %xmm5
1288 test %eax, %eax
1289 palignr $8, %xmm3, %xmm4
1290 jnz L(Shl8Start)
1291# ifdef USE_AS_STRNCPY
1292 sub $64, %ebx
1293 jbe L(StrncpyLeave8)
1294# endif
1295 palignr $8, %xmm2, %xmm3
1296 lea 64(%ecx), %ecx
1297 palignr $8, %xmm1, %xmm2
1298 movaps %xmm7, %xmm1
1299 movaps %xmm5, 48(%edx)
1300 movaps %xmm4, 32(%edx)
1301 movaps %xmm3, 16(%edx)
1302 movaps %xmm2, (%edx)
1303 lea 64(%edx), %edx
1304 jmp L(Shl8LoopStart)
1305
1306L(Shl8LoopExit):
1307 movlpd (%ecx), %xmm0
1308 movlpd %xmm0, (%edx)
1309 mov $8, %esi
1310 jmp L(CopyFrom1To16Bytes)
1311
1312 .p2align 4
1313L(Shl9):
1314 movaps -9(%ecx), %xmm1
1315 movaps 7(%ecx), %xmm2
1316L(Shl9Start):
1317 pcmpeqb %xmm2, %xmm0
1318 pmovmskb %xmm0, %eax
1319 movaps %xmm2, %xmm3
1320# ifdef USE_AS_STRNCPY
1321 sub $16, %ebx
1322 jbe L(StrncpyExit9Case2OrCase3)
1323# endif
1324 test %eax, %eax
1325 jnz L(Shl9LoopExit)
1326
1327 palignr $9, %xmm1, %xmm2
1328 movaps %xmm3, %xmm1
1329 movaps %xmm2, (%edx)
1330 movaps 23(%ecx), %xmm2
1331
1332 pcmpeqb %xmm2, %xmm0
1333 lea 16(%edx), %edx
1334 pmovmskb %xmm0, %eax
1335 lea 16(%ecx), %ecx
1336 movaps %xmm2, %xmm3
1337# ifdef USE_AS_STRNCPY
1338 sub $16, %ebx
1339 jbe L(StrncpyExit9Case2OrCase3)
1340# endif
1341 test %eax, %eax
1342 jnz L(Shl9LoopExit)
1343
1344 palignr $9, %xmm1, %xmm2
1345 movaps %xmm2, (%edx)
1346 movaps 23(%ecx), %xmm2
1347 movaps %xmm3, %xmm1
1348
1349 pcmpeqb %xmm2, %xmm0
1350 lea 16(%edx), %edx
1351 pmovmskb %xmm0, %eax
1352 lea 16(%ecx), %ecx
1353 movaps %xmm2, %xmm3
1354# ifdef USE_AS_STRNCPY
1355 sub $16, %ebx
1356 jbe L(StrncpyExit9Case2OrCase3)
1357# endif
1358 test %eax, %eax
1359 jnz L(Shl9LoopExit)
1360
1361 palignr $9, %xmm1, %xmm2
1362 movaps %xmm2, (%edx)
1363 movaps 23(%ecx), %xmm2
1364
1365 pcmpeqb %xmm2, %xmm0
1366 lea 16(%edx), %edx
1367 pmovmskb %xmm0, %eax
1368 lea 16(%ecx), %ecx
1369# ifdef USE_AS_STRNCPY
1370 sub $16, %ebx
1371 jbe L(StrncpyExit9Case2OrCase3)
1372# endif
1373 test %eax, %eax
1374 jnz L(Shl9LoopExit)
1375
1376 palignr $9, %xmm3, %xmm2
1377 movaps %xmm2, (%edx)
1378 lea 23(%ecx), %ecx
1379 lea 16(%edx), %edx
1380
1381 mov %ecx, %eax
1382 and $-0x40, %ecx
1383 sub %ecx, %eax
1384 lea -7(%ecx), %ecx
1385 sub %eax, %edx
1386# ifdef USE_AS_STRNCPY
1387 add %eax, %ebx
1388# endif
1389 movaps -9(%ecx), %xmm1
1390
1391L(Shl9LoopStart):
1392 movaps 7(%ecx), %xmm2
1393 movaps 23(%ecx), %xmm3
1394 movaps %xmm3, %xmm6
1395 movaps 39(%ecx), %xmm4
1396 movaps %xmm4, %xmm7
1397 movaps 55(%ecx), %xmm5
1398 pminub %xmm2, %xmm6
1399 pminub %xmm5, %xmm7
1400 pminub %xmm6, %xmm7
1401 pcmpeqb %xmm0, %xmm7
1402 pmovmskb %xmm7, %eax
1403 movaps %xmm5, %xmm7
1404 palignr $9, %xmm4, %xmm5
1405 test %eax, %eax
1406 palignr $9, %xmm3, %xmm4
1407 jnz L(Shl9Start)
1408# ifdef USE_AS_STRNCPY
1409 sub $64, %ebx
1410 jbe L(StrncpyLeave9)
1411# endif
1412 palignr $9, %xmm2, %xmm3
1413 lea 64(%ecx), %ecx
1414 palignr $9, %xmm1, %xmm2
1415 movaps %xmm7, %xmm1
1416 movaps %xmm5, 48(%edx)
1417 movaps %xmm4, 32(%edx)
1418 movaps %xmm3, 16(%edx)
1419 movaps %xmm2, (%edx)
1420 lea 64(%edx), %edx
1421 jmp L(Shl9LoopStart)
1422
1423L(Shl9LoopExit):
1424 movlpd -1(%ecx), %xmm0
1425 movlpd %xmm0, -1(%edx)
1426 mov $7, %esi
1427 jmp L(CopyFrom1To16Bytes)
1428
1429 .p2align 4
1430L(Shl10):
1431 movaps -10(%ecx), %xmm1
1432 movaps 6(%ecx), %xmm2
1433L(Shl10Start):
1434 pcmpeqb %xmm2, %xmm0
1435 pmovmskb %xmm0, %eax
1436 movaps %xmm2, %xmm3
1437# ifdef USE_AS_STRNCPY
1438 sub $16, %ebx
1439 jbe L(StrncpyExit10Case2OrCase3)
1440# endif
1441 test %eax, %eax
1442 jnz L(Shl10LoopExit)
1443
1444 palignr $10, %xmm1, %xmm2
1445 movaps %xmm3, %xmm1
1446 movaps %xmm2, (%edx)
1447 movaps 22(%ecx), %xmm2
1448
1449 pcmpeqb %xmm2, %xmm0
1450 lea 16(%edx), %edx
1451 pmovmskb %xmm0, %eax
1452 lea 16(%ecx), %ecx
1453 movaps %xmm2, %xmm3
1454# ifdef USE_AS_STRNCPY
1455 sub $16, %ebx
1456 jbe L(StrncpyExit10Case2OrCase3)
1457# endif
1458 test %eax, %eax
1459 jnz L(Shl10LoopExit)
1460
1461 palignr $10, %xmm1, %xmm2
1462 movaps %xmm2, (%edx)
1463 movaps 22(%ecx), %xmm2
1464 movaps %xmm3, %xmm1
1465
1466 pcmpeqb %xmm2, %xmm0
1467 lea 16(%edx), %edx
1468 pmovmskb %xmm0, %eax
1469 lea 16(%ecx), %ecx
1470 movaps %xmm2, %xmm3
1471# ifdef USE_AS_STRNCPY
1472 sub $16, %ebx
1473 jbe L(StrncpyExit10Case2OrCase3)
1474# endif
1475 test %eax, %eax
1476 jnz L(Shl10LoopExit)
1477
1478 palignr $10, %xmm1, %xmm2
1479 movaps %xmm2, (%edx)
1480 movaps 22(%ecx), %xmm2
1481
1482 pcmpeqb %xmm2, %xmm0
1483 lea 16(%edx), %edx
1484 pmovmskb %xmm0, %eax
1485 lea 16(%ecx), %ecx
1486# ifdef USE_AS_STRNCPY
1487 sub $16, %ebx
1488 jbe L(StrncpyExit10Case2OrCase3)
1489# endif
1490 test %eax, %eax
1491 jnz L(Shl10LoopExit)
1492
1493 palignr $10, %xmm3, %xmm2
1494 movaps %xmm2, (%edx)
1495 lea 22(%ecx), %ecx
1496 lea 16(%edx), %edx
1497
1498 mov %ecx, %eax
1499 and $-0x40, %ecx
1500 sub %ecx, %eax
1501 lea -6(%ecx), %ecx
1502 sub %eax, %edx
1503# ifdef USE_AS_STRNCPY
1504 add %eax, %ebx
1505# endif
1506 movaps -10(%ecx), %xmm1
1507
1508L(Shl10LoopStart):
1509 movaps 6(%ecx), %xmm2
1510 movaps 22(%ecx), %xmm3
1511 movaps %xmm3, %xmm6
1512 movaps 38(%ecx), %xmm4
1513 movaps %xmm4, %xmm7
1514 movaps 54(%ecx), %xmm5
1515 pminub %xmm2, %xmm6
1516 pminub %xmm5, %xmm7
1517 pminub %xmm6, %xmm7
1518 pcmpeqb %xmm0, %xmm7
1519 pmovmskb %xmm7, %eax
1520 movaps %xmm5, %xmm7
1521 palignr $10, %xmm4, %xmm5
1522 test %eax, %eax
1523 palignr $10, %xmm3, %xmm4
1524 jnz L(Shl10Start)
1525# ifdef USE_AS_STRNCPY
1526 sub $64, %ebx
1527 jbe L(StrncpyLeave10)
1528# endif
1529 palignr $10, %xmm2, %xmm3
1530 lea 64(%ecx), %ecx
1531 palignr $10, %xmm1, %xmm2
1532 movaps %xmm7, %xmm1
1533 movaps %xmm5, 48(%edx)
1534 movaps %xmm4, 32(%edx)
1535 movaps %xmm3, 16(%edx)
1536 movaps %xmm2, (%edx)
1537 lea 64(%edx), %edx
1538 jmp L(Shl10LoopStart)
1539
1540L(Shl10LoopExit):
1541 movlpd -2(%ecx), %xmm0
1542 movlpd %xmm0, -2(%edx)
1543 mov $6, %esi
1544 jmp L(CopyFrom1To16Bytes)
1545
1546 .p2align 4
1547L(Shl11):
1548 movaps -11(%ecx), %xmm1
1549 movaps 5(%ecx), %xmm2
1550L(Shl11Start):
1551 pcmpeqb %xmm2, %xmm0
1552 pmovmskb %xmm0, %eax
1553 movaps %xmm2, %xmm3
1554# ifdef USE_AS_STRNCPY
1555 sub $16, %ebx
1556 jbe L(StrncpyExit11Case2OrCase3)
1557# endif
1558 test %eax, %eax
1559 jnz L(Shl11LoopExit)
1560
1561 palignr $11, %xmm1, %xmm2
1562 movaps %xmm3, %xmm1
1563 movaps %xmm2, (%edx)
1564 movaps 21(%ecx), %xmm2
1565
1566 pcmpeqb %xmm2, %xmm0
1567 lea 16(%edx), %edx
1568 pmovmskb %xmm0, %eax
1569 lea 16(%ecx), %ecx
1570 movaps %xmm2, %xmm3
1571# ifdef USE_AS_STRNCPY
1572 sub $16, %ebx
1573 jbe L(StrncpyExit11Case2OrCase3)
1574# endif
1575 test %eax, %eax
1576 jnz L(Shl11LoopExit)
1577
1578 palignr $11, %xmm1, %xmm2
1579 movaps %xmm2, (%edx)
1580 movaps 21(%ecx), %xmm2
1581 movaps %xmm3, %xmm1
1582
1583 pcmpeqb %xmm2, %xmm0
1584 lea 16(%edx), %edx
1585 pmovmskb %xmm0, %eax
1586 lea 16(%ecx), %ecx
1587 movaps %xmm2, %xmm3
1588# ifdef USE_AS_STRNCPY
1589 sub $16, %ebx
1590 jbe L(StrncpyExit11Case2OrCase3)
1591# endif
1592 test %eax, %eax
1593 jnz L(Shl11LoopExit)
1594
1595 palignr $11, %xmm1, %xmm2
1596 movaps %xmm2, (%edx)
1597 movaps 21(%ecx), %xmm2
1598
1599 pcmpeqb %xmm2, %xmm0
1600 lea 16(%edx), %edx
1601 pmovmskb %xmm0, %eax
1602 lea 16(%ecx), %ecx
1603# ifdef USE_AS_STRNCPY
1604 sub $16, %ebx
1605 jbe L(StrncpyExit11Case2OrCase3)
1606# endif
1607 test %eax, %eax
1608 jnz L(Shl11LoopExit)
1609
1610 palignr $11, %xmm3, %xmm2
1611 movaps %xmm2, (%edx)
1612 lea 21(%ecx), %ecx
1613 lea 16(%edx), %edx
1614
1615 mov %ecx, %eax
1616 and $-0x40, %ecx
1617 sub %ecx, %eax
1618 lea -5(%ecx), %ecx
1619 sub %eax, %edx
1620# ifdef USE_AS_STRNCPY
1621 add %eax, %ebx
1622# endif
1623 movaps -11(%ecx), %xmm1
1624
1625L(Shl11LoopStart):
1626 movaps 5(%ecx), %xmm2
1627 movaps 21(%ecx), %xmm3
1628 movaps %xmm3, %xmm6
1629 movaps 37(%ecx), %xmm4
1630 movaps %xmm4, %xmm7
1631 movaps 53(%ecx), %xmm5
1632 pminub %xmm2, %xmm6
1633 pminub %xmm5, %xmm7
1634 pminub %xmm6, %xmm7
1635 pcmpeqb %xmm0, %xmm7
1636 pmovmskb %xmm7, %eax
1637 movaps %xmm5, %xmm7
1638 palignr $11, %xmm4, %xmm5
1639 test %eax, %eax
1640 palignr $11, %xmm3, %xmm4
1641 jnz L(Shl11Start)
1642# ifdef USE_AS_STRNCPY
1643 sub $64, %ebx
1644 jbe L(StrncpyLeave11)
1645# endif
1646 palignr $11, %xmm2, %xmm3
1647 lea 64(%ecx), %ecx
1648 palignr $11, %xmm1, %xmm2
1649 movaps %xmm7, %xmm1
1650 movaps %xmm5, 48(%edx)
1651 movaps %xmm4, 32(%edx)
1652 movaps %xmm3, 16(%edx)
1653 movaps %xmm2, (%edx)
1654 lea 64(%edx), %edx
1655 jmp L(Shl11LoopStart)
1656
1657L(Shl11LoopExit):
1658 movlpd -3(%ecx), %xmm0
1659 movlpd %xmm0, -3(%edx)
1660 mov $5, %esi
1661 jmp L(CopyFrom1To16Bytes)
1662
1663 .p2align 4
1664L(Shl12):
1665 movaps -12(%ecx), %xmm1
1666 movaps 4(%ecx), %xmm2
1667L(Shl12Start):
1668 pcmpeqb %xmm2, %xmm0
1669 pmovmskb %xmm0, %eax
1670 movaps %xmm2, %xmm3
1671# ifdef USE_AS_STRNCPY
1672 sub $16, %ebx
1673 jbe L(StrncpyExit12Case2OrCase3)
1674# endif
1675 test %eax, %eax
1676 jnz L(Shl12LoopExit)
1677
1678 palignr $12, %xmm1, %xmm2
1679 movaps %xmm3, %xmm1
1680 movaps %xmm2, (%edx)
1681 movaps 20(%ecx), %xmm2
1682
1683 pcmpeqb %xmm2, %xmm0
1684 lea 16(%edx), %edx
1685 pmovmskb %xmm0, %eax
1686 lea 16(%ecx), %ecx
1687 movaps %xmm2, %xmm3
1688# ifdef USE_AS_STRNCPY
1689 sub $16, %ebx
1690 jbe L(StrncpyExit12Case2OrCase3)
1691# endif
1692 test %eax, %eax
1693 jnz L(Shl12LoopExit)
1694
1695 palignr $12, %xmm1, %xmm2
1696 movaps %xmm2, (%edx)
1697 movaps 20(%ecx), %xmm2
1698 movaps %xmm3, %xmm1
1699
1700 pcmpeqb %xmm2, %xmm0
1701 lea 16(%edx), %edx
1702 pmovmskb %xmm0, %eax
1703 lea 16(%ecx), %ecx
1704 movaps %xmm2, %xmm3
1705# ifdef USE_AS_STRNCPY
1706 sub $16, %ebx
1707 jbe L(StrncpyExit12Case2OrCase3)
1708# endif
1709 test %eax, %eax
1710 jnz L(Shl12LoopExit)
1711
1712 palignr $12, %xmm1, %xmm2
1713 movaps %xmm2, (%edx)
1714 movaps 20(%ecx), %xmm2
1715
1716 pcmpeqb %xmm2, %xmm0
1717 lea 16(%edx), %edx
1718 pmovmskb %xmm0, %eax
1719 lea 16(%ecx), %ecx
1720# ifdef USE_AS_STRNCPY
1721 sub $16, %ebx
1722 jbe L(StrncpyExit12Case2OrCase3)
1723# endif
1724 test %eax, %eax
1725 jnz L(Shl12LoopExit)
1726
1727 palignr $12, %xmm3, %xmm2
1728 movaps %xmm2, (%edx)
1729 lea 20(%ecx), %ecx
1730 lea 16(%edx), %edx
1731
1732 mov %ecx, %eax
1733 and $-0x40, %ecx
1734 sub %ecx, %eax
1735 lea -4(%ecx), %ecx
1736 sub %eax, %edx
1737# ifdef USE_AS_STRNCPY
1738 add %eax, %ebx
1739# endif
1740 movaps -12(%ecx), %xmm1
1741
1742L(Shl12LoopStart):
1743 movaps 4(%ecx), %xmm2
1744 movaps 20(%ecx), %xmm3
1745 movaps %xmm3, %xmm6
1746 movaps 36(%ecx), %xmm4
1747 movaps %xmm4, %xmm7
1748 movaps 52(%ecx), %xmm5
1749 pminub %xmm2, %xmm6
1750 pminub %xmm5, %xmm7
1751 pminub %xmm6, %xmm7
1752 pcmpeqb %xmm0, %xmm7
1753 pmovmskb %xmm7, %eax
1754 movaps %xmm5, %xmm7
1755 palignr $12, %xmm4, %xmm5
1756 test %eax, %eax
1757 palignr $12, %xmm3, %xmm4
1758 jnz L(Shl12Start)
1759# ifdef USE_AS_STRNCPY
1760 sub $64, %ebx
1761 jbe L(StrncpyLeave12)
1762# endif
1763 palignr $12, %xmm2, %xmm3
1764 lea 64(%ecx), %ecx
1765 palignr $12, %xmm1, %xmm2
1766 movaps %xmm7, %xmm1
1767 movaps %xmm5, 48(%edx)
1768 movaps %xmm4, 32(%edx)
1769 movaps %xmm3, 16(%edx)
1770 movaps %xmm2, (%edx)
1771 lea 64(%edx), %edx
1772 jmp L(Shl12LoopStart)
1773
1774L(Shl12LoopExit):
1775 movl (%ecx), %esi
1776 movl %esi, (%edx)
1777 mov $4, %esi
1778 jmp L(CopyFrom1To16Bytes)
1779
1780 .p2align 4
1781L(Shl13):
1782 movaps -13(%ecx), %xmm1
1783 movaps 3(%ecx), %xmm2
1784L(Shl13Start):
1785 pcmpeqb %xmm2, %xmm0
1786 pmovmskb %xmm0, %eax
1787 movaps %xmm2, %xmm3
1788# ifdef USE_AS_STRNCPY
1789 sub $16, %ebx
1790 jbe L(StrncpyExit13Case2OrCase3)
1791# endif
1792 test %eax, %eax
1793 jnz L(Shl13LoopExit)
1794
1795 palignr $13, %xmm1, %xmm2
1796 movaps %xmm3, %xmm1
1797 movaps %xmm2, (%edx)
1798 movaps 19(%ecx), %xmm2
1799
1800 pcmpeqb %xmm2, %xmm0
1801 lea 16(%edx), %edx
1802 pmovmskb %xmm0, %eax
1803 lea 16(%ecx), %ecx
1804 movaps %xmm2, %xmm3
1805# ifdef USE_AS_STRNCPY
1806 sub $16, %ebx
1807 jbe L(StrncpyExit13Case2OrCase3)
1808# endif
1809 test %eax, %eax
1810 jnz L(Shl13LoopExit)
1811
1812 palignr $13, %xmm1, %xmm2
1813 movaps %xmm2, (%edx)
1814 movaps 19(%ecx), %xmm2
1815 movaps %xmm3, %xmm1
1816
1817 pcmpeqb %xmm2, %xmm0
1818 lea 16(%edx), %edx
1819 pmovmskb %xmm0, %eax
1820 lea 16(%ecx), %ecx
1821 movaps %xmm2, %xmm3
1822# ifdef USE_AS_STRNCPY
1823 sub $16, %ebx
1824 jbe L(StrncpyExit13Case2OrCase3)
1825# endif
1826 test %eax, %eax
1827 jnz L(Shl13LoopExit)
1828
1829 palignr $13, %xmm1, %xmm2
1830 movaps %xmm2, (%edx)
1831 movaps 19(%ecx), %xmm2
1832
1833 pcmpeqb %xmm2, %xmm0
1834 lea 16(%edx), %edx
1835 pmovmskb %xmm0, %eax
1836 lea 16(%ecx), %ecx
1837# ifdef USE_AS_STRNCPY
1838 sub $16, %ebx
1839 jbe L(StrncpyExit13Case2OrCase3)
1840# endif
1841 test %eax, %eax
1842 jnz L(Shl13LoopExit)
1843
1844 palignr $13, %xmm3, %xmm2
1845 movaps %xmm2, (%edx)
1846 lea 19(%ecx), %ecx
1847 lea 16(%edx), %edx
1848
1849 mov %ecx, %eax
1850 and $-0x40, %ecx
1851 sub %ecx, %eax
1852 lea -3(%ecx), %ecx
1853 sub %eax, %edx
1854# ifdef USE_AS_STRNCPY
1855 add %eax, %ebx
1856# endif
1857 movaps -13(%ecx), %xmm1
1858
1859L(Shl13LoopStart):
1860 movaps 3(%ecx), %xmm2
1861 movaps 19(%ecx), %xmm3
1862 movaps %xmm3, %xmm6
1863 movaps 35(%ecx), %xmm4
1864 movaps %xmm4, %xmm7
1865 movaps 51(%ecx), %xmm5
1866 pminub %xmm2, %xmm6
1867 pminub %xmm5, %xmm7
1868 pminub %xmm6, %xmm7
1869 pcmpeqb %xmm0, %xmm7
1870 pmovmskb %xmm7, %eax
1871 movaps %xmm5, %xmm7
1872 palignr $13, %xmm4, %xmm5
1873 test %eax, %eax
1874 palignr $13, %xmm3, %xmm4
1875 jnz L(Shl13Start)
1876# ifdef USE_AS_STRNCPY
1877 sub $64, %ebx
1878 jbe L(StrncpyLeave13)
1879# endif
1880 palignr $13, %xmm2, %xmm3
1881 lea 64(%ecx), %ecx
1882 palignr $13, %xmm1, %xmm2
1883 movaps %xmm7, %xmm1
1884 movaps %xmm5, 48(%edx)
1885 movaps %xmm4, 32(%edx)
1886 movaps %xmm3, 16(%edx)
1887 movaps %xmm2, (%edx)
1888 lea 64(%edx), %edx
1889 jmp L(Shl13LoopStart)
1890
1891L(Shl13LoopExit):
1892 movl -1(%ecx), %esi
1893 movl %esi, -1(%edx)
1894 mov $3, %esi
1895 jmp L(CopyFrom1To16Bytes)
1896
1897 .p2align 4
1898L(Shl14):
1899 movaps -14(%ecx), %xmm1
1900 movaps 2(%ecx), %xmm2
1901L(Shl14Start):
1902 pcmpeqb %xmm2, %xmm0
1903 pmovmskb %xmm0, %eax
1904 movaps %xmm2, %xmm3
1905# ifdef USE_AS_STRNCPY
1906 sub $16, %ebx
1907 jbe L(StrncpyExit14Case2OrCase3)
1908# endif
1909 test %eax, %eax
1910 jnz L(Shl14LoopExit)
1911
1912 palignr $14, %xmm1, %xmm2
1913 movaps %xmm3, %xmm1
1914 movaps %xmm2, (%edx)
1915 movaps 18(%ecx), %xmm2
1916
1917 pcmpeqb %xmm2, %xmm0
1918 lea 16(%edx), %edx
1919 pmovmskb %xmm0, %eax
1920 lea 16(%ecx), %ecx
1921 movaps %xmm2, %xmm3
1922# ifdef USE_AS_STRNCPY
1923 sub $16, %ebx
1924 jbe L(StrncpyExit14Case2OrCase3)
1925# endif
1926 test %eax, %eax
1927 jnz L(Shl14LoopExit)
1928
1929 palignr $14, %xmm1, %xmm2
1930 movaps %xmm2, (%edx)
1931 movaps 18(%ecx), %xmm2
1932 movaps %xmm3, %xmm1
1933
1934 pcmpeqb %xmm2, %xmm0
1935 lea 16(%edx), %edx
1936 pmovmskb %xmm0, %eax
1937 lea 16(%ecx), %ecx
1938 movaps %xmm2, %xmm3
1939# ifdef USE_AS_STRNCPY
1940 sub $16, %ebx
1941 jbe L(StrncpyExit14Case2OrCase3)
1942# endif
1943 test %eax, %eax
1944 jnz L(Shl14LoopExit)
1945
1946 palignr $14, %xmm1, %xmm2
1947 movaps %xmm2, (%edx)
1948 movaps 18(%ecx), %xmm2
1949
1950 pcmpeqb %xmm2, %xmm0
1951 lea 16(%edx), %edx
1952 pmovmskb %xmm0, %eax
1953 lea 16(%ecx), %ecx
1954# ifdef USE_AS_STRNCPY
1955 sub $16, %ebx
1956 jbe L(StrncpyExit14Case2OrCase3)
1957# endif
1958 test %eax, %eax
1959 jnz L(Shl14LoopExit)
1960
1961 palignr $14, %xmm3, %xmm2
1962 movaps %xmm2, (%edx)
1963 lea 18(%ecx), %ecx
1964 lea 16(%edx), %edx
1965
1966 mov %ecx, %eax
1967 and $-0x40, %ecx
1968 sub %ecx, %eax
1969 lea -2(%ecx), %ecx
1970 sub %eax, %edx
1971# ifdef USE_AS_STRNCPY
1972 add %eax, %ebx
1973# endif
1974 movaps -14(%ecx), %xmm1
1975
1976L(Shl14LoopStart):
1977 movaps 2(%ecx), %xmm2
1978 movaps 18(%ecx), %xmm3
1979 movaps %xmm3, %xmm6
1980 movaps 34(%ecx), %xmm4
1981 movaps %xmm4, %xmm7
1982 movaps 50(%ecx), %xmm5
1983 pminub %xmm2, %xmm6
1984 pminub %xmm5, %xmm7
1985 pminub %xmm6, %xmm7
1986 pcmpeqb %xmm0, %xmm7
1987 pmovmskb %xmm7, %eax
1988 movaps %xmm5, %xmm7
1989 palignr $14, %xmm4, %xmm5
1990 test %eax, %eax
1991 palignr $14, %xmm3, %xmm4
1992 jnz L(Shl14Start)
1993# ifdef USE_AS_STRNCPY
1994 sub $64, %ebx
1995 jbe L(StrncpyLeave14)
1996# endif
1997 palignr $14, %xmm2, %xmm3
1998 lea 64(%ecx), %ecx
1999 palignr $14, %xmm1, %xmm2
2000 movaps %xmm7, %xmm1
2001 movaps %xmm5, 48(%edx)
2002 movaps %xmm4, 32(%edx)
2003 movaps %xmm3, 16(%edx)
2004 movaps %xmm2, (%edx)
2005 lea 64(%edx), %edx
2006 jmp L(Shl14LoopStart)
2007
2008L(Shl14LoopExit):
2009 movl -2(%ecx), %esi
2010 movl %esi, -2(%edx)
2011 mov $2, %esi
2012 jmp L(CopyFrom1To16Bytes)
2013
2014 .p2align 4
2015L(Shl15):
2016 movaps -15(%ecx), %xmm1
2017 movaps 1(%ecx), %xmm2
2018L(Shl15Start):
2019 pcmpeqb %xmm2, %xmm0
2020 pmovmskb %xmm0, %eax
2021 movaps %xmm2, %xmm3
2022# ifdef USE_AS_STRNCPY
2023 sub $16, %ebx
2024 jbe L(StrncpyExit15Case2OrCase3)
2025# endif
2026 test %eax, %eax
2027 jnz L(Shl15LoopExit)
2028
2029 palignr $15, %xmm1, %xmm2
2030 movaps %xmm3, %xmm1
2031 movaps %xmm2, (%edx)
2032 movaps 17(%ecx), %xmm2
2033
2034 pcmpeqb %xmm2, %xmm0
2035 lea 16(%edx), %edx
2036 pmovmskb %xmm0, %eax
2037 lea 16(%ecx), %ecx
2038 movaps %xmm2, %xmm3
2039# ifdef USE_AS_STRNCPY
2040 sub $16, %ebx
2041 jbe L(StrncpyExit15Case2OrCase3)
2042# endif
2043 test %eax, %eax
2044 jnz L(Shl15LoopExit)
2045
2046 palignr $15, %xmm1, %xmm2
2047 movaps %xmm2, (%edx)
2048 movaps 17(%ecx), %xmm2
2049 movaps %xmm3, %xmm1
2050
2051 pcmpeqb %xmm2, %xmm0
2052 lea 16(%edx), %edx
2053 pmovmskb %xmm0, %eax
2054 lea 16(%ecx), %ecx
2055 movaps %xmm2, %xmm3
2056# ifdef USE_AS_STRNCPY
2057 sub $16, %ebx
2058 jbe L(StrncpyExit15Case2OrCase3)
2059# endif
2060 test %eax, %eax
2061 jnz L(Shl15LoopExit)
2062
2063 palignr $15, %xmm1, %xmm2
2064 movaps %xmm2, (%edx)
2065 movaps 17(%ecx), %xmm2
2066
2067 pcmpeqb %xmm2, %xmm0
2068 lea 16(%edx), %edx
2069 pmovmskb %xmm0, %eax
2070 lea 16(%ecx), %ecx
2071# ifdef USE_AS_STRNCPY
2072 sub $16, %ebx
2073 jbe L(StrncpyExit15Case2OrCase3)
2074# endif
2075 test %eax, %eax
2076 jnz L(Shl15LoopExit)
2077
2078 palignr $15, %xmm3, %xmm2
2079 movaps %xmm2, (%edx)
2080 lea 17(%ecx), %ecx
2081 lea 16(%edx), %edx
2082
2083 mov %ecx, %eax
2084 and $-0x40, %ecx
2085 sub %ecx, %eax
2086 lea -1(%ecx), %ecx
2087 sub %eax, %edx
2088# ifdef USE_AS_STRNCPY
2089 add %eax, %ebx
2090# endif
2091 movaps -15(%ecx), %xmm1
2092
2093L(Shl15LoopStart):
2094 movaps 1(%ecx), %xmm2
2095 movaps 17(%ecx), %xmm3
2096 movaps %xmm3, %xmm6
2097 movaps 33(%ecx), %xmm4
2098 movaps %xmm4, %xmm7
2099 movaps 49(%ecx), %xmm5
2100 pminub %xmm2, %xmm6
2101 pminub %xmm5, %xmm7
2102 pminub %xmm6, %xmm7
2103 pcmpeqb %xmm0, %xmm7
2104 pmovmskb %xmm7, %eax
2105 movaps %xmm5, %xmm7
2106 palignr $15, %xmm4, %xmm5
2107 test %eax, %eax
2108 palignr $15, %xmm3, %xmm4
2109 jnz L(Shl15Start)
2110# ifdef USE_AS_STRNCPY
2111 sub $64, %ebx
2112 jbe L(StrncpyLeave15)
2113# endif
2114 palignr $15, %xmm2, %xmm3
2115 lea 64(%ecx), %ecx
2116 palignr $15, %xmm1, %xmm2
2117 movaps %xmm7, %xmm1
2118 movaps %xmm5, 48(%edx)
2119 movaps %xmm4, 32(%edx)
2120 movaps %xmm3, 16(%edx)
2121 movaps %xmm2, (%edx)
2122 lea 64(%edx), %edx
2123 jmp L(Shl15LoopStart)
2124
2125L(Shl15LoopExit):
2126 movl -3(%ecx), %esi
2127 movl %esi, -3(%edx)
2128 mov $1, %esi
2129# ifdef USE_AS_STRCAT
2130 jmp L(CopyFrom1To16Bytes)
2131# endif
2132
2133
2134# ifndef USE_AS_STRCAT
2135
2136 .p2align 4
2137L(CopyFrom1To16Bytes):
2138# ifdef USE_AS_STRNCPY
2139 add $16, %ebx
2140# endif
2141 add %esi, %edx
2142 add %esi, %ecx
2143
2144 POP (%esi)
2145 test %al, %al
2146 jz L(ExitHigh8)
2147
2148L(CopyFrom1To16BytesLess8):
2149 mov %al, %ah
2150 and $15, %ah
2151 jz L(ExitHigh4)
2152
2153 test $0x01, %al
2154 jnz L(Exit1)
2155 test $0x02, %al
2156 jnz L(Exit2)
2157 test $0x04, %al
2158 jnz L(Exit3)
2159
2160 .p2align 4
2161L(Exit4):
2162 movl (%ecx), %eax
2163 movl %eax, (%edx)
2164 SAVE_RESULT (3)
2165# ifdef USE_AS_STRNCPY
2166 sub $4, %ebx
2167 lea 4(%edx), %ecx
2168 jnz L(StrncpyFillTailWithZero1)
2169# ifdef USE_AS_STPCPY
2170 cmpb $1, (%eax)
2171 sbb $-1, %eax
2172# endif
2173# endif
2174 RETURN1
2175
2176 .p2align 4
2177L(ExitHigh4):
2178 test $0x10, %al
2179 jnz L(Exit5)
2180 test $0x20, %al
2181 jnz L(Exit6)
2182 test $0x40, %al
2183 jnz L(Exit7)
2184
2185 .p2align 4
2186L(Exit8):
2187 movlpd (%ecx), %xmm0
2188 movlpd %xmm0, (%edx)
2189 SAVE_RESULT (7)
2190# ifdef USE_AS_STRNCPY
2191 sub $8, %ebx
2192 lea 8(%edx), %ecx
2193 jnz L(StrncpyFillTailWithZero1)
2194# ifdef USE_AS_STPCPY
2195 cmpb $1, (%eax)
2196 sbb $-1, %eax
2197# endif
2198# endif
2199 RETURN1
2200
2201 .p2align 4
2202L(ExitHigh8):
2203 mov %ah, %al
2204 and $15, %al
2205 jz L(ExitHigh12)
2206
2207 test $0x01, %ah
2208 jnz L(Exit9)
2209 test $0x02, %ah
2210 jnz L(Exit10)
2211 test $0x04, %ah
2212 jnz L(Exit11)
2213
2214 .p2align 4
2215L(Exit12):
2216 movlpd (%ecx), %xmm0
2217 movl 8(%ecx), %eax
2218 movlpd %xmm0, (%edx)
2219 movl %eax, 8(%edx)
2220 SAVE_RESULT (11)
2221# ifdef USE_AS_STRNCPY
2222 sub $12, %ebx
2223 lea 12(%edx), %ecx
2224 jnz L(StrncpyFillTailWithZero1)
2225# ifdef USE_AS_STPCPY
2226 cmpb $1, (%eax)
2227 sbb $-1, %eax
2228# endif
2229# endif
2230 RETURN1
2231
2232 .p2align 4
2233L(ExitHigh12):
2234 test $0x10, %ah
2235 jnz L(Exit13)
2236 test $0x20, %ah
2237 jnz L(Exit14)
2238 test $0x40, %ah
2239 jnz L(Exit15)
2240
2241 .p2align 4
2242L(Exit16):
2243 movdqu (%ecx), %xmm0
2244 movdqu %xmm0, (%edx)
2245 SAVE_RESULT (15)
2246# ifdef USE_AS_STRNCPY
2247 sub $16, %ebx
2248 lea 16(%edx), %ecx
2249 jnz L(StrncpyFillTailWithZero1)
2250# ifdef USE_AS_STPCPY
2251 cmpb $1, (%eax)
2252 sbb $-1, %eax
2253# endif
2254# endif
2255 RETURN1
2256
2257# ifdef USE_AS_STRNCPY
2258
2259 CFI_PUSH(%esi)
2260
2261 .p2align 4
2262L(CopyFrom1To16BytesCase2):
2263 add $16, %ebx
2264 add %esi, %ecx
2265 add %esi, %edx
2266
2267 POP (%esi)
2268
2269 test %al, %al
2270 jz L(ExitHighCase2)
2271
2272 cmp $8, %ebx
2273 ja L(CopyFrom1To16BytesLess8)
2274
2275 test $0x01, %al
2276 jnz L(Exit1)
2277 cmp $1, %ebx
2278 je L(Exit1)
2279 test $0x02, %al
2280 jnz L(Exit2)
2281 cmp $2, %ebx
2282 je L(Exit2)
2283 test $0x04, %al
2284 jnz L(Exit3)
2285 cmp $3, %ebx
2286 je L(Exit3)
2287 test $0x08, %al
2288 jnz L(Exit4)
2289 cmp $4, %ebx
2290 je L(Exit4)
2291 test $0x10, %al
2292 jnz L(Exit5)
2293 cmp $5, %ebx
2294 je L(Exit5)
2295 test $0x20, %al
2296 jnz L(Exit6)
2297 cmp $6, %ebx
2298 je L(Exit6)
2299 test $0x40, %al
2300 jnz L(Exit7)
2301 cmp $7, %ebx
2302 je L(Exit7)
2303 jmp L(Exit8)
2304
2305 .p2align 4
2306L(ExitHighCase2):
2307 cmp $8, %ebx
2308 jbe L(CopyFrom1To16BytesLess8Case3)
2309
2310 test $0x01, %ah
2311 jnz L(Exit9)
2312 cmp $9, %ebx
2313 je L(Exit9)
2314 test $0x02, %ah
2315 jnz L(Exit10)
2316 cmp $10, %ebx
2317 je L(Exit10)
2318 test $0x04, %ah
2319 jnz L(Exit11)
2320 cmp $11, %ebx
2321 je L(Exit11)
2322 test $0x8, %ah
2323 jnz L(Exit12)
2324 cmp $12, %ebx
2325 je L(Exit12)
2326 test $0x10, %ah
2327 jnz L(Exit13)
2328 cmp $13, %ebx
2329 je L(Exit13)
2330 test $0x20, %ah
2331 jnz L(Exit14)
2332 cmp $14, %ebx
2333 je L(Exit14)
2334 test $0x40, %ah
2335 jnz L(Exit15)
2336 cmp $15, %ebx
2337 je L(Exit15)
2338 jmp L(Exit16)
2339
2340 CFI_PUSH(%esi)
2341
2342 .p2align 4
2343L(CopyFrom1To16BytesCase2OrCase3):
2344 test %eax, %eax
2345 jnz L(CopyFrom1To16BytesCase2)
2346
2347 .p2align 4
2348L(CopyFrom1To16BytesCase3):
2349 add $16, %ebx
2350 add %esi, %edx
2351 add %esi, %ecx
2352
2353 POP (%esi)
2354
2355 cmp $8, %ebx
2356 ja L(ExitHigh8Case3)
2357
2358L(CopyFrom1To16BytesLess8Case3):
2359 cmp $4, %ebx
2360 ja L(ExitHigh4Case3)
2361
2362 cmp $1, %ebx
2363 je L(Exit1)
2364 cmp $2, %ebx
2365 je L(Exit2)
2366 cmp $3, %ebx
2367 je L(Exit3)
2368 movl (%ecx), %eax
2369 movl %eax, (%edx)
2370 SAVE_RESULT (4)
2371 RETURN1
2372
2373 .p2align 4
2374L(ExitHigh4Case3):
2375 cmp $5, %ebx
2376 je L(Exit5)
2377 cmp $6, %ebx
2378 je L(Exit6)
2379 cmp $7, %ebx
2380 je L(Exit7)
2381 movlpd (%ecx), %xmm0
2382 movlpd %xmm0, (%edx)
2383 SAVE_RESULT (8)
2384 RETURN1
2385
2386 .p2align 4
2387L(ExitHigh8Case3):
2388 cmp $12, %ebx
2389 ja L(ExitHigh12Case3)
2390
2391 cmp $9, %ebx
2392 je L(Exit9)
2393 cmp $10, %ebx
2394 je L(Exit10)
2395 cmp $11, %ebx
2396 je L(Exit11)
2397 movlpd (%ecx), %xmm0
2398 movl 8(%ecx), %eax
2399 movlpd %xmm0, (%edx)
2400 movl %eax, 8(%edx)
2401 SAVE_RESULT (12)
2402 RETURN1
2403
2404 .p2align 4
2405L(ExitHigh12Case3):
2406 cmp $13, %ebx
2407 je L(Exit13)
2408 cmp $14, %ebx
2409 je L(Exit14)
2410 cmp $15, %ebx
2411 je L(Exit15)
2412 movlpd (%ecx), %xmm0
2413 movlpd 8(%ecx), %xmm1
2414 movlpd %xmm0, (%edx)
2415 movlpd %xmm1, 8(%edx)
2416 SAVE_RESULT (16)
2417 RETURN1
2418
2419# endif
2420
2421 .p2align 4
2422L(Exit1):
2423 movb (%ecx), %al
2424 movb %al, (%edx)
2425 SAVE_RESULT (0)
2426# ifdef USE_AS_STRNCPY
2427 sub $1, %ebx
2428 lea 1(%edx), %ecx
2429 jnz L(StrncpyFillTailWithZero1)
2430# ifdef USE_AS_STPCPY
2431 cmpb $1, (%eax)
2432 sbb $-1, %eax
2433# endif
2434# endif
2435 RETURN1
2436
2437 .p2align 4
2438L(Exit2):
2439 movw (%ecx), %ax
2440 movw %ax, (%edx)
2441 SAVE_RESULT (1)
2442# ifdef USE_AS_STRNCPY
2443 sub $2, %ebx
2444 lea 2(%edx), %ecx
2445 jnz L(StrncpyFillTailWithZero1)
2446# ifdef USE_AS_STPCPY
2447 cmpb $1, (%eax)
2448 sbb $-1, %eax
2449# endif
2450# endif
2451 RETURN1
2452
2453 .p2align 4
2454L(Exit3):
2455 movw (%ecx), %ax
2456 movw %ax, (%edx)
2457 movb 2(%ecx), %al
2458 movb %al, 2(%edx)
2459 SAVE_RESULT (2)
2460# ifdef USE_AS_STRNCPY
2461 sub $3, %ebx
2462 lea 3(%edx), %ecx
2463 jnz L(StrncpyFillTailWithZero1)
2464# ifdef USE_AS_STPCPY
2465 cmpb $1, (%eax)
2466 sbb $-1, %eax
2467# endif
2468# endif
2469 RETURN1
2470
2471 .p2align 4
2472L(Exit5):
2473 movl (%ecx), %eax
2474 movl %eax, (%edx)
2475 movb 4(%ecx), %al
2476 movb %al, 4(%edx)
2477 SAVE_RESULT (4)
2478# ifdef USE_AS_STRNCPY
2479 sub $5, %ebx
2480 lea 5(%edx), %ecx
2481 jnz L(StrncpyFillTailWithZero1)
2482# ifdef USE_AS_STPCPY
2483 cmpb $1, (%eax)
2484 sbb $-1, %eax
2485# endif
2486# endif
2487 RETURN1
2488
2489 .p2align 4
2490L(Exit6):
2491 movl (%ecx), %eax
2492 movl %eax, (%edx)
2493 movw 4(%ecx), %ax
2494 movw %ax, 4(%edx)
2495 SAVE_RESULT (5)
2496# ifdef USE_AS_STRNCPY
2497 sub $6, %ebx
2498 lea 6(%edx), %ecx
2499 jnz L(StrncpyFillTailWithZero1)
2500# ifdef USE_AS_STPCPY
2501 cmpb $1, (%eax)
2502 sbb $-1, %eax
2503# endif
2504# endif
2505 RETURN1
2506
2507 .p2align 4
2508L(Exit7):
2509 movl (%ecx), %eax
2510 movl %eax, (%edx)
2511 movl 3(%ecx), %eax
2512 movl %eax, 3(%edx)
2513 SAVE_RESULT (6)
2514# ifdef USE_AS_STRNCPY
2515 sub $7, %ebx
2516 lea 7(%edx), %ecx
2517 jnz L(StrncpyFillTailWithZero1)
2518# ifdef USE_AS_STPCPY
2519 cmpb $1, (%eax)
2520 sbb $-1, %eax
2521# endif
2522# endif
2523 RETURN1
2524
2525 .p2align 4
2526L(Exit9):
2527 movlpd (%ecx), %xmm0
2528 movb 8(%ecx), %al
2529 movlpd %xmm0, (%edx)
2530 movb %al, 8(%edx)
2531 SAVE_RESULT (8)
2532# ifdef USE_AS_STRNCPY
2533 sub $9, %ebx
2534 lea 9(%edx), %ecx
2535 jnz L(StrncpyFillTailWithZero1)
2536# ifdef USE_AS_STPCPY
2537 cmpb $1, (%eax)
2538 sbb $-1, %eax
2539# endif
2540# endif
2541 RETURN1
2542
2543 .p2align 4
2544L(Exit10):
2545 movlpd (%ecx), %xmm0
2546 movw 8(%ecx), %ax
2547 movlpd %xmm0, (%edx)
2548 movw %ax, 8(%edx)
2549 SAVE_RESULT (9)
2550# ifdef USE_AS_STRNCPY
2551 sub $10, %ebx
2552 lea 10(%edx), %ecx
2553 jnz L(StrncpyFillTailWithZero1)
2554# ifdef USE_AS_STPCPY
2555 cmpb $1, (%eax)
2556 sbb $-1, %eax
2557# endif
2558# endif
2559 RETURN1
2560
2561 .p2align 4
2562L(Exit11):
2563 movlpd (%ecx), %xmm0
2564 movl 7(%ecx), %eax
2565 movlpd %xmm0, (%edx)
2566 movl %eax, 7(%edx)
2567 SAVE_RESULT (10)
2568# ifdef USE_AS_STRNCPY
2569 sub $11, %ebx
2570 lea 11(%edx), %ecx
2571 jnz L(StrncpyFillTailWithZero1)
2572# ifdef USE_AS_STPCPY
2573 cmpb $1, (%eax)
2574 sbb $-1, %eax
2575# endif
2576# endif
2577 RETURN1
2578
2579 .p2align 4
2580L(Exit13):
2581 movlpd (%ecx), %xmm0
2582 movlpd 5(%ecx), %xmm1
2583 movlpd %xmm0, (%edx)
2584 movlpd %xmm1, 5(%edx)
2585 SAVE_RESULT (12)
2586# ifdef USE_AS_STRNCPY
2587 sub $13, %ebx
2588 lea 13(%edx), %ecx
2589 jnz L(StrncpyFillTailWithZero1)
2590# ifdef USE_AS_STPCPY
2591 cmpb $1, (%eax)
2592 sbb $-1, %eax
2593# endif
2594# endif
2595 RETURN1
2596
2597 .p2align 4
2598L(Exit14):
2599 movlpd (%ecx), %xmm0
2600 movlpd 6(%ecx), %xmm1
2601 movlpd %xmm0, (%edx)
2602 movlpd %xmm1, 6(%edx)
2603 SAVE_RESULT (13)
2604# ifdef USE_AS_STRNCPY
2605 sub $14, %ebx
2606 lea 14(%edx), %ecx
2607 jnz L(StrncpyFillTailWithZero1)
2608# ifdef USE_AS_STPCPY
2609 cmpb $1, (%eax)
2610 sbb $-1, %eax
2611# endif
2612# endif
2613 RETURN1
2614
2615 .p2align 4
2616L(Exit15):
2617 movlpd (%ecx), %xmm0
2618 movlpd 7(%ecx), %xmm1
2619 movlpd %xmm0, (%edx)
2620 movlpd %xmm1, 7(%edx)
2621 SAVE_RESULT (14)
2622# ifdef USE_AS_STRNCPY
2623 sub $15, %ebx
2624 lea 15(%edx), %ecx
2625 jnz L(StrncpyFillTailWithZero1)
2626# ifdef USE_AS_STPCPY
2627 cmpb $1, (%eax)
2628 sbb $-1, %eax
2629# endif
2630# endif
2631 RETURN1
2632
2633CFI_POP (%edi)
2634
2635# ifdef USE_AS_STRNCPY
2636 .p2align 4
2637L(Fill0):
2638 RETURN
2639
2640 .p2align 4
2641L(Fill1):
2642 movb %dl, (%ecx)
2643 RETURN
2644
2645 .p2align 4
2646L(Fill2):
2647 movw %dx, (%ecx)
2648 RETURN
2649
2650 .p2align 4
2651L(Fill3):
2652 movw %dx, (%ecx)
2653 movb %dl, 2(%ecx)
2654 RETURN
2655
2656 .p2align 4
2657L(Fill4):
2658 movl %edx, (%ecx)
2659 RETURN
2660
2661 .p2align 4
2662L(Fill5):
2663 movl %edx, (%ecx)
2664 movb %dl, 4(%ecx)
2665 RETURN
2666
2667 .p2align 4
2668L(Fill6):
2669 movl %edx, (%ecx)
2670 movw %dx, 4(%ecx)
2671 RETURN
2672
2673 .p2align 4
2674L(Fill7):
2675 movl %edx, (%ecx)
2676 movl %edx, 3(%ecx)
2677 RETURN
2678
2679 .p2align 4
2680L(Fill8):
2681 movlpd %xmm0, (%ecx)
2682 RETURN
2683
2684 .p2align 4
2685L(Fill9):
2686 movlpd %xmm0, (%ecx)
2687 movb %dl, 8(%ecx)
2688 RETURN
2689
2690 .p2align 4
2691L(Fill10):
2692 movlpd %xmm0, (%ecx)
2693 movw %dx, 8(%ecx)
2694 RETURN
2695
2696 .p2align 4
2697L(Fill11):
2698 movlpd %xmm0, (%ecx)
2699 movl %edx, 7(%ecx)
2700 RETURN
2701
2702 .p2align 4
2703L(Fill12):
2704 movlpd %xmm0, (%ecx)
2705 movl %edx, 8(%ecx)
2706 RETURN
2707
2708 .p2align 4
2709L(Fill13):
2710 movlpd %xmm0, (%ecx)
2711 movlpd %xmm0, 5(%ecx)
2712 RETURN
2713
2714 .p2align 4
2715L(Fill14):
2716 movlpd %xmm0, (%ecx)
2717 movlpd %xmm0, 6(%ecx)
2718 RETURN
2719
2720 .p2align 4
2721L(Fill15):
2722 movlpd %xmm0, (%ecx)
2723 movlpd %xmm0, 7(%ecx)
2724 RETURN
2725
2726 .p2align 4
2727L(Fill16):
2728 movlpd %xmm0, (%ecx)
2729 movlpd %xmm0, 8(%ecx)
2730 RETURN
2731
2732 .p2align 4
2733L(StrncpyFillExit1):
2734 lea 16(%ebx), %ebx
2735L(FillFrom1To16Bytes):
2736 test %ebx, %ebx
2737 jz L(Fill0)
2738 cmp $16, %ebx
2739 je L(Fill16)
2740 cmp $8, %ebx
2741 je L(Fill8)
2742 jg L(FillMore8)
2743 cmp $4, %ebx
2744 je L(Fill4)
2745 jg L(FillMore4)
2746 cmp $2, %ebx
2747 jl L(Fill1)
2748 je L(Fill2)
2749 jg L(Fill3)
2750L(FillMore8): /* but less than 16 */
2751 cmp $12, %ebx
2752 je L(Fill12)
2753 jl L(FillLess12)
2754 cmp $14, %ebx
2755 jl L(Fill13)
2756 je L(Fill14)
2757 jg L(Fill15)
2758L(FillMore4): /* but less than 8 */
2759 cmp $6, %ebx
2760 jl L(Fill5)
2761 je L(Fill6)
2762 jg L(Fill7)
2763L(FillLess12): /* but more than 8 */
2764 cmp $10, %ebx
2765 jl L(Fill9)
2766 je L(Fill10)
2767 jmp L(Fill11)
2768
2769 CFI_PUSH(%edi)
2770
2771 .p2align 4
2772L(StrncpyFillTailWithZero1):
2773 POP (%edi)
2774L(StrncpyFillTailWithZero):
2775 pxor %xmm0, %xmm0
2776 xor %edx, %edx
2777 sub $16, %ebx
2778 jbe L(StrncpyFillExit1)
2779
2780 movlpd %xmm0, (%ecx)
2781 movlpd %xmm0, 8(%ecx)
2782
2783 lea 16(%ecx), %ecx
2784
2785 mov %ecx, %edx
2786 and $0xf, %edx
2787 sub %edx, %ecx
2788 add %edx, %ebx
2789 xor %edx, %edx
2790 sub $64, %ebx
2791 jb L(StrncpyFillLess64)
2792
2793L(StrncpyFillLoopMovdqa):
2794 movdqa %xmm0, (%ecx)
2795 movdqa %xmm0, 16(%ecx)
2796 movdqa %xmm0, 32(%ecx)
2797 movdqa %xmm0, 48(%ecx)
2798 lea 64(%ecx), %ecx
2799 sub $64, %ebx
2800 jae L(StrncpyFillLoopMovdqa)
2801
2802L(StrncpyFillLess64):
2803 add $32, %ebx
2804 jl L(StrncpyFillLess32)
2805 movdqa %xmm0, (%ecx)
2806 movdqa %xmm0, 16(%ecx)
2807 lea 32(%ecx), %ecx
2808 sub $16, %ebx
2809 jl L(StrncpyFillExit1)
2810 movdqa %xmm0, (%ecx)
2811 lea 16(%ecx), %ecx
2812 jmp L(FillFrom1To16Bytes)
2813
2814L(StrncpyFillLess32):
2815 add $16, %ebx
2816 jl L(StrncpyFillExit1)
2817 movdqa %xmm0, (%ecx)
2818 lea 16(%ecx), %ecx
2819 jmp L(FillFrom1To16Bytes)
2820# endif
2821
2822 .p2align 4
2823L(ExitTail1):
2824 movb (%ecx), %al
2825 movb %al, (%edx)
2826 SAVE_RESULT_TAIL (0)
2827# ifdef USE_AS_STRNCPY
2828 sub $1, %ebx
2829 lea 1(%edx), %ecx
2830 jnz L(StrncpyFillTailWithZero)
2831# ifdef USE_AS_STPCPY
2832 cmpb $1, (%eax)
2833 sbb $-1, %eax
2834# endif
2835# endif
2836 RETURN
2837
2838 .p2align 4
2839L(ExitTail2):
2840 movw (%ecx), %ax
2841 movw %ax, (%edx)
2842 SAVE_RESULT_TAIL (1)
2843# ifdef USE_AS_STRNCPY
2844 sub $2, %ebx
2845 lea 2(%edx), %ecx
2846 jnz L(StrncpyFillTailWithZero)
2847# ifdef USE_AS_STPCPY
2848 cmpb $1, (%eax)
2849 sbb $-1, %eax
2850# endif
2851# endif
2852 RETURN
2853
2854 .p2align 4
2855L(ExitTail3):
2856 movw (%ecx), %ax
2857 movw %ax, (%edx)
2858 movb 2(%ecx), %al
2859 movb %al, 2(%edx)
2860 SAVE_RESULT_TAIL (2)
2861# ifdef USE_AS_STRNCPY
2862 sub $3, %ebx
2863 lea 3(%edx), %ecx
2864 jnz L(StrncpyFillTailWithZero)
2865# ifdef USE_AS_STPCPY
2866 cmpb $1, (%eax)
2867 sbb $-1, %eax
2868# endif
2869# endif
2870 RETURN
2871
2872 .p2align 4
2873L(ExitTail4):
2874 movl (%ecx), %eax
2875 movl %eax, (%edx)
2876 SAVE_RESULT_TAIL (3)
2877# ifdef USE_AS_STRNCPY
2878 sub $4, %ebx
2879 lea 4(%edx), %ecx
2880 jnz L(StrncpyFillTailWithZero)
2881# ifdef USE_AS_STPCPY
2882 cmpb $1, (%eax)
2883 sbb $-1, %eax
2884# endif
2885# endif
2886 RETURN
2887
2888 .p2align 4
2889L(ExitTail5):
2890 movl (%ecx), %eax
2891 movl %eax, (%edx)
2892 movb 4(%ecx), %al
2893 movb %al, 4(%edx)
2894 SAVE_RESULT_TAIL (4)
2895# ifdef USE_AS_STRNCPY
2896 sub $5, %ebx
2897 lea 5(%edx), %ecx
2898 jnz L(StrncpyFillTailWithZero)
2899# ifdef USE_AS_STPCPY
2900 cmpb $1, (%eax)
2901 sbb $-1, %eax
2902# endif
2903# endif
2904 RETURN
2905
2906 .p2align 4
2907L(ExitTail6):
2908 movl (%ecx), %eax
2909 movl %eax, (%edx)
2910 movw 4(%ecx), %ax
2911 movw %ax, 4(%edx)
2912 SAVE_RESULT_TAIL (5)
2913# ifdef USE_AS_STRNCPY
2914 sub $6, %ebx
2915 lea 6(%edx), %ecx
2916 jnz L(StrncpyFillTailWithZero)
2917# ifdef USE_AS_STPCPY
2918 cmpb $1, (%eax)
2919 sbb $-1, %eax
2920# endif
2921# endif
2922 RETURN
2923
2924 .p2align 4
2925L(ExitTail7):
2926 movl (%ecx), %eax
2927 movl %eax, (%edx)
2928 movl 3(%ecx), %eax
2929 movl %eax, 3(%edx)
2930 SAVE_RESULT_TAIL (6)
2931# ifdef USE_AS_STRNCPY
2932 sub $7, %ebx
2933 lea 7(%edx), %ecx
2934 jnz L(StrncpyFillTailWithZero)
2935# ifdef USE_AS_STPCPY
2936 cmpb $1, (%eax)
2937 sbb $-1, %eax
2938# endif
2939# endif
2940 RETURN
2941
2942 .p2align 4
2943L(ExitTail8):
2944 movlpd (%ecx), %xmm0
2945 movlpd %xmm0, (%edx)
2946 SAVE_RESULT_TAIL (7)
2947# ifdef USE_AS_STRNCPY
2948 sub $8, %ebx
2949 lea 8(%edx), %ecx
2950 jnz L(StrncpyFillTailWithZero)
2951# endif
2952 RETURN
2953
2954 .p2align 4
2955L(ExitTail9):
2956 movlpd (%ecx), %xmm0
2957 movb 8(%ecx), %al
2958 movlpd %xmm0, (%edx)
2959 movb %al, 8(%edx)
2960 SAVE_RESULT_TAIL (8)
2961# ifdef USE_AS_STRNCPY
2962 sub $9, %ebx
2963 lea 9(%edx), %ecx
2964 jnz L(StrncpyFillTailWithZero)
2965# ifdef USE_AS_STPCPY
2966 cmpb $1, (%eax)
2967 sbb $-1, %eax
2968# endif
2969# endif
2970 RETURN
2971
2972 .p2align 4
2973L(ExitTail10):
2974 movlpd (%ecx), %xmm0
2975 movw 8(%ecx), %ax
2976 movlpd %xmm0, (%edx)
2977 movw %ax, 8(%edx)
2978 SAVE_RESULT_TAIL (9)
2979# ifdef USE_AS_STRNCPY
2980 sub $10, %ebx
2981 lea 10(%edx), %ecx
2982 jnz L(StrncpyFillTailWithZero)
2983# ifdef USE_AS_STPCPY
2984 cmpb $1, (%eax)
2985 sbb $-1, %eax
2986# endif
2987# endif
2988 RETURN
2989
2990 .p2align 4
2991L(ExitTail11):
2992 movlpd (%ecx), %xmm0
2993 movl 7(%ecx), %eax
2994 movlpd %xmm0, (%edx)
2995 movl %eax, 7(%edx)
2996 SAVE_RESULT_TAIL (10)
2997# ifdef USE_AS_STRNCPY
2998 sub $11, %ebx
2999 lea 11(%edx), %ecx
3000 jnz L(StrncpyFillTailWithZero)
3001# ifdef USE_AS_STPCPY
3002 cmpb $1, (%eax)
3003 sbb $-1, %eax
3004# endif
3005# endif
3006 RETURN
3007
3008 .p2align 4
3009L(ExitTail12):
3010 movlpd (%ecx), %xmm0
3011 movl 8(%ecx), %eax
3012 movlpd %xmm0, (%edx)
3013 movl %eax, 8(%edx)
3014 SAVE_RESULT_TAIL (11)
3015# ifdef USE_AS_STRNCPY
3016 sub $12, %ebx
3017 lea 12(%edx), %ecx
3018 jnz L(StrncpyFillTailWithZero)
3019# ifdef USE_AS_STPCPY
3020 cmpb $1, (%eax)
3021 sbb $-1, %eax
3022# endif
3023# endif
3024 RETURN
3025
3026 .p2align 4
3027L(ExitTail13):
3028 movlpd (%ecx), %xmm0
3029 movlpd 5(%ecx), %xmm1
3030 movlpd %xmm0, (%edx)
3031 movlpd %xmm1, 5(%edx)
3032 SAVE_RESULT_TAIL (12)
3033# ifdef USE_AS_STRNCPY
3034 sub $13, %ebx
3035 lea 13(%edx), %ecx
3036 jnz L(StrncpyFillTailWithZero)
3037# ifdef USE_AS_STPCPY
3038 cmpb $1, (%eax)
3039 sbb $-1, %eax
3040# endif
3041# endif
3042 RETURN
3043
3044 .p2align 4
3045L(ExitTail14):
3046 movlpd (%ecx), %xmm0
3047 movlpd 6(%ecx), %xmm1
3048 movlpd %xmm0, (%edx)
3049 movlpd %xmm1, 6(%edx)
3050 SAVE_RESULT_TAIL (13)
3051# ifdef USE_AS_STRNCPY
3052 sub $14, %ebx
3053 lea 14(%edx), %ecx
3054 jnz L(StrncpyFillTailWithZero)
3055# ifdef USE_AS_STPCPY
3056 cmpb $1, (%eax)
3057 sbb $-1, %eax
3058# endif
3059# endif
3060 RETURN
3061
3062 .p2align 4
3063L(ExitTail15):
3064 movlpd (%ecx), %xmm0
3065 movlpd 7(%ecx), %xmm1
3066 movlpd %xmm0, (%edx)
3067 movlpd %xmm1, 7(%edx)
3068 SAVE_RESULT_TAIL (14)
3069# ifdef USE_AS_STRNCPY
3070 sub $15, %ebx
3071 lea 15(%edx), %ecx
3072 jnz L(StrncpyFillTailWithZero)
3073# endif
3074 RETURN
3075
3076 .p2align 4
3077L(ExitTail16):
3078 movdqu (%ecx), %xmm0
3079 movdqu %xmm0, (%edx)
3080 SAVE_RESULT_TAIL (15)
3081# ifdef USE_AS_STRNCPY
3082 sub $16, %ebx
3083 lea 16(%edx), %ecx
3084 jnz L(StrncpyFillTailWithZero)
3085# ifdef USE_AS_STPCPY
3086 cmpb $1, (%eax)
3087 sbb $-1, %eax
3088# endif
3089# endif
3090 RETURN
3091# endif
3092
3093# ifdef USE_AS_STRNCPY
3094# ifndef USE_AS_STRCAT
3095 CFI_PUSH (%esi)
3096 CFI_PUSH (%edi)
3097# endif
3098 .p2align 4
3099L(StrncpyLeaveCase2OrCase3):
3100 test %eax, %eax
3101 jnz L(Aligned64LeaveCase2)
3102
3103L(Aligned64LeaveCase3):
3104 add $48, %ebx
3105 jle L(CopyFrom1To16BytesCase3)
3106 movaps %xmm4, -64(%edx)
3107 lea 16(%esi), %esi
3108 sub $16, %ebx
3109 jbe L(CopyFrom1To16BytesCase3)
3110 movaps %xmm5, -48(%edx)
3111 lea 16(%esi), %esi
3112 sub $16, %ebx
3113 jbe L(CopyFrom1To16BytesCase3)
3114 movaps %xmm6, -32(%edx)
3115 lea 16(%esi), %esi
3116 lea -16(%ebx), %ebx
3117 jmp L(CopyFrom1To16BytesCase3)
3118
3119L(Aligned64LeaveCase2):
3120 pcmpeqb %xmm4, %xmm0
3121 pmovmskb %xmm0, %eax
3122 add $48, %ebx
3123 jle L(CopyFrom1To16BytesCase2OrCase3)
3124 test %eax, %eax
3125 jnz L(CopyFrom1To16Bytes)
3126
3127 pcmpeqb %xmm5, %xmm0
3128 pmovmskb %xmm0, %eax
3129 movaps %xmm4, -64(%edx)
3130 lea 16(%esi), %esi
3131 sub $16, %ebx
3132 jbe L(CopyFrom1To16BytesCase2OrCase3)
3133 test %eax, %eax
3134 jnz L(CopyFrom1To16Bytes)
3135
3136 pcmpeqb %xmm6, %xmm0
3137 pmovmskb %xmm0, %eax
3138 movaps %xmm5, -48(%edx)
3139 lea 16(%esi), %esi
3140 sub $16, %ebx
3141 jbe L(CopyFrom1To16BytesCase2OrCase3)
3142 test %eax, %eax
3143 jnz L(CopyFrom1To16Bytes)
3144
3145 pcmpeqb %xmm7, %xmm0
3146 pmovmskb %xmm0, %eax
3147 movaps %xmm6, -32(%edx)
3148 lea 16(%esi), %esi
3149 lea -16(%ebx), %ebx
3150 jmp L(CopyFrom1To16BytesCase2)
3151
3152/*--------------------------------------------------*/
3153 .p2align 4
3154L(StrncpyExit1Case2OrCase3):
3155 movlpd (%ecx), %xmm0
3156 movlpd 7(%ecx), %xmm1
3157 movlpd %xmm0, (%edx)
3158 movlpd %xmm1, 7(%edx)
3159 mov $15, %esi
3160 test %eax, %eax
3161 jnz L(CopyFrom1To16BytesCase2)
3162 jmp L(CopyFrom1To16BytesCase3)
3163
3164 .p2align 4
3165L(StrncpyExit2Case2OrCase3):
3166 movlpd (%ecx), %xmm0
3167 movlpd 6(%ecx), %xmm1
3168 movlpd %xmm0, (%edx)
3169 movlpd %xmm1, 6(%edx)
3170 mov $14, %esi
3171 test %eax, %eax
3172 jnz L(CopyFrom1To16BytesCase2)
3173 jmp L(CopyFrom1To16BytesCase3)
3174
3175 .p2align 4
3176L(StrncpyExit3Case2OrCase3):
3177 movlpd (%ecx), %xmm0
3178 movlpd 5(%ecx), %xmm1
3179 movlpd %xmm0, (%edx)
3180 movlpd %xmm1, 5(%edx)
3181 mov $13, %esi
3182 test %eax, %eax
3183 jnz L(CopyFrom1To16BytesCase2)
3184 jmp L(CopyFrom1To16BytesCase3)
3185
3186 .p2align 4
3187L(StrncpyExit4Case2OrCase3):
3188 movlpd (%ecx), %xmm0
3189 movl 8(%ecx), %esi
3190 movlpd %xmm0, (%edx)
3191 movl %esi, 8(%edx)
3192 mov $12, %esi
3193 test %eax, %eax
3194 jnz L(CopyFrom1To16BytesCase2)
3195 jmp L(CopyFrom1To16BytesCase3)
3196
3197 .p2align 4
3198L(StrncpyExit5Case2OrCase3):
3199 movlpd (%ecx), %xmm0
3200 movl 7(%ecx), %esi
3201 movlpd %xmm0, (%edx)
3202 movl %esi, 7(%edx)
3203 mov $11, %esi
3204 test %eax, %eax
3205 jnz L(CopyFrom1To16BytesCase2)
3206 jmp L(CopyFrom1To16BytesCase3)
3207
3208 .p2align 4
3209L(StrncpyExit6Case2OrCase3):
3210 movlpd (%ecx), %xmm0
3211 movl 6(%ecx), %esi
3212 movlpd %xmm0, (%edx)
3213 movl %esi, 6(%edx)
3214 mov $10, %esi
3215 test %eax, %eax
3216 jnz L(CopyFrom1To16BytesCase2)
3217 jmp L(CopyFrom1To16BytesCase3)
3218
3219 .p2align 4
3220L(StrncpyExit7Case2OrCase3):
3221 movlpd (%ecx), %xmm0
3222 movl 5(%ecx), %esi
3223 movlpd %xmm0, (%edx)
3224 movl %esi, 5(%edx)
3225 mov $9, %esi
3226 test %eax, %eax
3227 jnz L(CopyFrom1To16BytesCase2)
3228 jmp L(CopyFrom1To16BytesCase3)
3229
3230 .p2align 4
3231L(StrncpyExit8Case2OrCase3):
3232 movlpd (%ecx), %xmm0
3233 movlpd %xmm0, (%edx)
3234 mov $8, %esi
3235 test %eax, %eax
3236 jnz L(CopyFrom1To16BytesCase2)
3237 jmp L(CopyFrom1To16BytesCase3)
3238
3239 .p2align 4
3240L(StrncpyExit9Case2OrCase3):
3241 movlpd (%ecx), %xmm0
3242 movlpd %xmm0, (%edx)
3243 mov $7, %esi
3244 test %eax, %eax
3245 jnz L(CopyFrom1To16BytesCase2)
3246 jmp L(CopyFrom1To16BytesCase3)
3247
3248 .p2align 4
3249L(StrncpyExit10Case2OrCase3):
3250 movlpd -1(%ecx), %xmm0
3251 movlpd %xmm0, -1(%edx)
3252 mov $6, %esi
3253 test %eax, %eax
3254 jnz L(CopyFrom1To16BytesCase2)
3255 jmp L(CopyFrom1To16BytesCase3)
3256
3257 .p2align 4
3258L(StrncpyExit11Case2OrCase3):
3259 movlpd -2(%ecx), %xmm0
3260 movlpd %xmm0, -2(%edx)
3261 mov $5, %esi
3262 test %eax, %eax
3263 jnz L(CopyFrom1To16BytesCase2)
3264 jmp L(CopyFrom1To16BytesCase3)
3265
3266 .p2align 4
3267L(StrncpyExit12Case2OrCase3):
3268 movl (%ecx), %esi
3269 movl %esi, (%edx)
3270 mov $4, %esi
3271 test %eax, %eax
3272 jnz L(CopyFrom1To16BytesCase2)
3273 jmp L(CopyFrom1To16BytesCase3)
3274
3275 .p2align 4
3276L(StrncpyExit13Case2OrCase3):
3277 movl -1(%ecx), %esi
3278 movl %esi, -1(%edx)
3279 mov $3, %esi
3280 test %eax, %eax
3281 jnz L(CopyFrom1To16BytesCase2)
3282 jmp L(CopyFrom1To16BytesCase3)
3283
3284 .p2align 4
3285L(StrncpyExit14Case2OrCase3):
3286 movl -2(%ecx), %esi
3287 movl %esi, -2(%edx)
3288 mov $2, %esi
3289 test %eax, %eax
3290 jnz L(CopyFrom1To16BytesCase2)
3291 jmp L(CopyFrom1To16BytesCase3)
3292
3293 .p2align 4
3294L(StrncpyExit15Case2OrCase3):
3295 movl -3(%ecx), %esi
3296 movl %esi, -3(%edx)
3297 mov $1, %esi
3298 test %eax, %eax
3299 jnz L(CopyFrom1To16BytesCase2)
3300 jmp L(CopyFrom1To16BytesCase3)
3301
3302L(StrncpyLeave1):
3303 movaps %xmm2, %xmm3
3304 add $48, %ebx
3305 jle L(StrncpyExit1)
3306 palignr $1, %xmm1, %xmm2
3307 movaps %xmm2, (%edx)
3308 movaps 31(%ecx), %xmm2
3309 lea 16(%esi), %esi
3310 sub $16, %ebx
3311 jbe L(StrncpyExit1)
3312 palignr $1, %xmm3, %xmm2
3313 movaps %xmm2, 16(%edx)
3314 lea 16(%esi), %esi
3315 sub $16, %ebx
3316 jbe L(StrncpyExit1)
3317 movaps %xmm4, 32(%edx)
3318 lea 16(%esi), %esi
3319 sub $16, %ebx
3320 jbe L(StrncpyExit1)
3321 movaps %xmm5, 48(%edx)
3322 lea 16(%esi), %esi
3323 lea -16(%ebx), %ebx
3324L(StrncpyExit1):
3325 lea 15(%edx, %esi), %edx
3326 lea 15(%ecx, %esi), %ecx
3327 movdqu -16(%ecx), %xmm0
3328 xor %esi, %esi
3329 movdqu %xmm0, -16(%edx)
3330 jmp L(CopyFrom1To16BytesCase3)
3331
3332L(StrncpyLeave2):
3333 movaps %xmm2, %xmm3
3334 add $48, %ebx
3335 jle L(StrncpyExit2)
3336 palignr $2, %xmm1, %xmm2
3337 movaps %xmm2, (%edx)
3338 movaps 30(%ecx), %xmm2
3339 lea 16(%esi), %esi
3340 sub $16, %ebx
3341 jbe L(StrncpyExit2)
3342 palignr $2, %xmm3, %xmm2
3343 movaps %xmm2, 16(%edx)
3344 lea 16(%esi), %esi
3345 sub $16, %ebx
3346 jbe L(StrncpyExit2)
3347 movaps %xmm4, 32(%edx)
3348 lea 16(%esi), %esi
3349 sub $16, %ebx
3350 jbe L(StrncpyExit2)
3351 movaps %xmm5, 48(%edx)
3352 lea 16(%esi), %esi
3353 lea -16(%ebx), %ebx
3354L(StrncpyExit2):
3355 lea 14(%edx, %esi), %edx
3356 lea 14(%ecx, %esi), %ecx
3357 movdqu -16(%ecx), %xmm0
3358 xor %esi, %esi
3359 movdqu %xmm0, -16(%edx)
3360 jmp L(CopyFrom1To16BytesCase3)
3361
3362L(StrncpyLeave3):
3363 movaps %xmm2, %xmm3
3364 add $48, %ebx
3365 jle L(StrncpyExit3)
3366 palignr $3, %xmm1, %xmm2
3367 movaps %xmm2, (%edx)
3368 movaps 29(%ecx), %xmm2
3369 lea 16(%esi), %esi
3370 sub $16, %ebx
3371 jbe L(StrncpyExit3)
3372 palignr $3, %xmm3, %xmm2
3373 movaps %xmm2, 16(%edx)
3374 lea 16(%esi), %esi
3375 sub $16, %ebx
3376 jbe L(StrncpyExit3)
3377 movaps %xmm4, 32(%edx)
3378 lea 16(%esi), %esi
3379 sub $16, %ebx
3380 jbe L(StrncpyExit3)
3381 movaps %xmm5, 48(%edx)
3382 lea 16(%esi), %esi
3383 lea -16(%ebx), %ebx
3384L(StrncpyExit3):
3385 lea 13(%edx, %esi), %edx
3386 lea 13(%ecx, %esi), %ecx
3387 movdqu -16(%ecx), %xmm0
3388 xor %esi, %esi
3389 movdqu %xmm0, -16(%edx)
3390 jmp L(CopyFrom1To16BytesCase3)
3391
3392L(StrncpyLeave4):
3393 movaps %xmm2, %xmm3
3394 add $48, %ebx
3395 jle L(StrncpyExit4)
3396 palignr $4, %xmm1, %xmm2
3397 movaps %xmm2, (%edx)
3398 movaps 28(%ecx), %xmm2
3399 lea 16(%esi), %esi
3400 sub $16, %ebx
3401 jbe L(StrncpyExit4)
3402 palignr $4, %xmm3, %xmm2
3403 movaps %xmm2, 16(%edx)
3404 lea 16(%esi), %esi
3405 sub $16, %ebx
3406 jbe L(StrncpyExit4)
3407 movaps %xmm4, 32(%edx)
3408 lea 16(%esi), %esi
3409 sub $16, %ebx
3410 jbe L(StrncpyExit4)
3411 movaps %xmm5, 48(%edx)
3412 lea 16(%esi), %esi
3413 lea -16(%ebx), %ebx
3414L(StrncpyExit4):
3415 lea 12(%edx, %esi), %edx
3416 lea 12(%ecx, %esi), %ecx
3417 movlpd -12(%ecx), %xmm0
3418 movl -4(%ecx), %eax
3419 movlpd %xmm0, -12(%edx)
3420 movl %eax, -4(%edx)
3421 xor %esi, %esi
3422 jmp L(CopyFrom1To16BytesCase3)
3423
3424L(StrncpyLeave5):
3425 movaps %xmm2, %xmm3
3426 add $48, %ebx
3427 jle L(StrncpyExit5)
3428 palignr $5, %xmm1, %xmm2
3429 movaps %xmm2, (%edx)
3430 movaps 27(%ecx), %xmm2
3431 lea 16(%esi), %esi
3432 sub $16, %ebx
3433 jbe L(StrncpyExit5)
3434 palignr $5, %xmm3, %xmm2
3435 movaps %xmm2, 16(%edx)
3436 lea 16(%esi), %esi
3437 sub $16, %ebx
3438 jbe L(StrncpyExit5)
3439 movaps %xmm4, 32(%edx)
3440 lea 16(%esi), %esi
3441 sub $16, %ebx
3442 jbe L(StrncpyExit5)
3443 movaps %xmm5, 48(%edx)
3444 lea 16(%esi), %esi
3445 lea -16(%ebx), %ebx
3446L(StrncpyExit5):
3447 lea 11(%edx, %esi), %edx
3448 lea 11(%ecx, %esi), %ecx
3449 movlpd -11(%ecx), %xmm0
3450 movl -4(%ecx), %eax
3451 movlpd %xmm0, -11(%edx)
3452 movl %eax, -4(%edx)
3453 xor %esi, %esi
3454 jmp L(CopyFrom1To16BytesCase3)
3455
3456L(StrncpyLeave6):
3457 movaps %xmm2, %xmm3
3458 add $48, %ebx
3459 jle L(StrncpyExit6)
3460 palignr $6, %xmm1, %xmm2
3461 movaps %xmm2, (%edx)
3462 movaps 26(%ecx), %xmm2
3463 lea 16(%esi), %esi
3464 sub $16, %ebx
3465 jbe L(StrncpyExit6)
3466 palignr $6, %xmm3, %xmm2
3467 movaps %xmm2, 16(%edx)
3468 lea 16(%esi), %esi
3469 sub $16, %ebx
3470 jbe L(StrncpyExit6)
3471 movaps %xmm4, 32(%edx)
3472 lea 16(%esi), %esi
3473 sub $16, %ebx
3474 jbe L(StrncpyExit6)
3475 movaps %xmm5, 48(%edx)
3476 lea 16(%esi), %esi
3477 lea -16(%ebx), %ebx
3478L(StrncpyExit6):
3479 lea 10(%edx, %esi), %edx
3480 lea 10(%ecx, %esi), %ecx
3481
3482 movlpd -10(%ecx), %xmm0
3483 movw -2(%ecx), %ax
3484 movlpd %xmm0, -10(%edx)
3485 movw %ax, -2(%edx)
3486 xor %esi, %esi
3487 jmp L(CopyFrom1To16BytesCase3)
3488
3489L(StrncpyLeave7):
3490 movaps %xmm2, %xmm3
3491 add $48, %ebx
3492 jle L(StrncpyExit7)
3493 palignr $7, %xmm1, %xmm2
3494 movaps %xmm2, (%edx)
3495 movaps 25(%ecx), %xmm2
3496 lea 16(%esi), %esi
3497 sub $16, %ebx
3498 jbe L(StrncpyExit7)
3499 palignr $7, %xmm3, %xmm2
3500 movaps %xmm2, 16(%edx)
3501 lea 16(%esi), %esi
3502 sub $16, %ebx
3503 jbe L(StrncpyExit7)
3504 movaps %xmm4, 32(%edx)
3505 lea 16(%esi), %esi
3506 sub $16, %ebx
3507 jbe L(StrncpyExit7)
3508 movaps %xmm5, 48(%edx)
3509 lea 16(%esi), %esi
3510 lea -16(%ebx), %ebx
3511L(StrncpyExit7):
3512 lea 9(%edx, %esi), %edx
3513 lea 9(%ecx, %esi), %ecx
3514
3515 movlpd -9(%ecx), %xmm0
3516 movb -1(%ecx), %ah
3517 movlpd %xmm0, -9(%edx)
3518 movb %ah, -1(%edx)
3519 xor %esi, %esi
3520 jmp L(CopyFrom1To16BytesCase3)
3521
3522L(StrncpyLeave8):
3523 movaps %xmm2, %xmm3
3524 add $48, %ebx
3525 jle L(StrncpyExit8)
3526 palignr $8, %xmm1, %xmm2
3527 movaps %xmm2, (%edx)
3528 movaps 24(%ecx), %xmm2
3529 lea 16(%esi), %esi
3530 sub $16, %ebx
3531 jbe L(StrncpyExit8)
3532 palignr $8, %xmm3, %xmm2
3533 movaps %xmm2, 16(%edx)
3534 lea 16(%esi), %esi
3535 sub $16, %ebx
3536 jbe L(StrncpyExit8)
3537 movaps %xmm4, 32(%edx)
3538 lea 16(%esi), %esi
3539 sub $16, %ebx
3540 jbe L(StrncpyExit8)
3541 movaps %xmm5, 48(%edx)
3542 lea 16(%esi), %esi
3543 lea -16(%ebx), %ebx
3544L(StrncpyExit8):
3545 lea 8(%edx, %esi), %edx
3546 lea 8(%ecx, %esi), %ecx
3547 movlpd -8(%ecx), %xmm0
3548 movlpd %xmm0, -8(%edx)
3549 xor %esi, %esi
3550 jmp L(CopyFrom1To16BytesCase3)
3551
3552L(StrncpyLeave9):
3553 movaps %xmm2, %xmm3
3554 add $48, %ebx
3555 jle L(StrncpyExit9)
3556 palignr $9, %xmm1, %xmm2
3557 movaps %xmm2, (%edx)
3558 movaps 23(%ecx), %xmm2
3559 lea 16(%esi), %esi
3560 sub $16, %ebx
3561 jbe L(StrncpyExit9)
3562 palignr $9, %xmm3, %xmm2
3563 movaps %xmm2, 16(%edx)
3564 lea 16(%esi), %esi
3565 sub $16, %ebx
3566 jbe L(StrncpyExit9)
3567 movaps %xmm4, 32(%edx)
3568 lea 16(%esi), %esi
3569 sub $16, %ebx
3570 jbe L(StrncpyExit9)
3571 movaps %xmm5, 48(%edx)
3572 lea 16(%esi), %esi
3573 lea -16(%ebx), %ebx
3574L(StrncpyExit9):
3575 lea 7(%edx, %esi), %edx
3576 lea 7(%ecx, %esi), %ecx
3577
3578 movlpd -8(%ecx), %xmm0
3579 movlpd %xmm0, -8(%edx)
3580 xor %esi, %esi
3581 jmp L(CopyFrom1To16BytesCase3)
3582
3583L(StrncpyLeave10):
3584 movaps %xmm2, %xmm3
3585 add $48, %ebx
3586 jle L(StrncpyExit10)
3587 palignr $10, %xmm1, %xmm2
3588 movaps %xmm2, (%edx)
3589 movaps 22(%ecx), %xmm2
3590 lea 16(%esi), %esi
3591 sub $16, %ebx
3592 jbe L(StrncpyExit10)
3593 palignr $10, %xmm3, %xmm2
3594 movaps %xmm2, 16(%edx)
3595 lea 16(%esi), %esi
3596 sub $16, %ebx
3597 jbe L(StrncpyExit10)
3598 movaps %xmm4, 32(%edx)
3599 lea 16(%esi), %esi
3600 sub $16, %ebx
3601 jbe L(StrncpyExit10)
3602 movaps %xmm5, 48(%edx)
3603 lea 16(%esi), %esi
3604 lea -16(%ebx), %ebx
3605L(StrncpyExit10):
3606 lea 6(%edx, %esi), %edx
3607 lea 6(%ecx, %esi), %ecx
3608
3609 movlpd -8(%ecx), %xmm0
3610 movlpd %xmm0, -8(%edx)
3611 xor %esi, %esi
3612 jmp L(CopyFrom1To16BytesCase3)
3613
3614L(StrncpyLeave11):
3615 movaps %xmm2, %xmm3
3616 add $48, %ebx
3617 jle L(StrncpyExit11)
3618 palignr $11, %xmm1, %xmm2
3619 movaps %xmm2, (%edx)
3620 movaps 21(%ecx), %xmm2
3621 lea 16(%esi), %esi
3622 sub $16, %ebx
3623 jbe L(StrncpyExit11)
3624 palignr $11, %xmm3, %xmm2
3625 movaps %xmm2, 16(%edx)
3626 lea 16(%esi), %esi
3627 sub $16, %ebx
3628 jbe L(StrncpyExit11)
3629 movaps %xmm4, 32(%edx)
3630 lea 16(%esi), %esi
3631 sub $16, %ebx
3632 jbe L(StrncpyExit11)
3633 movaps %xmm5, 48(%edx)
3634 lea 16(%esi), %esi
3635 lea -16(%ebx), %ebx
3636L(StrncpyExit11):
3637 lea 5(%edx, %esi), %edx
3638 lea 5(%ecx, %esi), %ecx
3639 movl -5(%ecx), %esi
3640 movb -1(%ecx), %ah
3641 movl %esi, -5(%edx)
3642 movb %ah, -1(%edx)
3643 xor %esi, %esi
3644 jmp L(CopyFrom1To16BytesCase3)
3645
3646L(StrncpyLeave12):
3647 movaps %xmm2, %xmm3
3648 add $48, %ebx
3649 jle L(StrncpyExit12)
3650 palignr $12, %xmm1, %xmm2
3651 movaps %xmm2, (%edx)
3652 movaps 20(%ecx), %xmm2
3653 lea 16(%esi), %esi
3654 sub $16, %ebx
3655 jbe L(StrncpyExit12)
3656 palignr $12, %xmm3, %xmm2
3657 movaps %xmm2, 16(%edx)
3658 lea 16(%esi), %esi
3659 sub $16, %ebx
3660 jbe L(StrncpyExit12)
3661 movaps %xmm4, 32(%edx)
3662 lea 16(%esi), %esi
3663 sub $16, %ebx
3664 jbe L(StrncpyExit12)
3665 movaps %xmm5, 48(%edx)
3666 lea 16(%esi), %esi
3667 lea -16(%ebx), %ebx
3668L(StrncpyExit12):
3669 lea 4(%edx, %esi), %edx
3670 lea 4(%ecx, %esi), %ecx
3671 movl -4(%ecx), %eax
3672 movl %eax, -4(%edx)
3673 xor %esi, %esi
3674 jmp L(CopyFrom1To16BytesCase3)
3675
3676L(StrncpyLeave13):
3677 movaps %xmm2, %xmm3
3678 add $48, %ebx
3679 jle L(StrncpyExit13)
3680 palignr $13, %xmm1, %xmm2
3681 movaps %xmm2, (%edx)
3682 movaps 19(%ecx), %xmm2
3683 lea 16(%esi), %esi
3684 sub $16, %ebx
3685 jbe L(StrncpyExit13)
3686 palignr $13, %xmm3, %xmm2
3687 movaps %xmm2, 16(%edx)
3688 lea 16(%esi), %esi
3689 sub $16, %ebx
3690 jbe L(StrncpyExit13)
3691 movaps %xmm4, 32(%edx)
3692 lea 16(%esi), %esi
3693 sub $16, %ebx
3694 jbe L(StrncpyExit13)
3695 movaps %xmm5, 48(%edx)
3696 lea 16(%esi), %esi
3697 lea -16(%ebx), %ebx
3698L(StrncpyExit13):
3699 lea 3(%edx, %esi), %edx
3700 lea 3(%ecx, %esi), %ecx
3701
3702 movl -4(%ecx), %eax
3703 movl %eax, -4(%edx)
3704 xor %esi, %esi
3705 jmp L(CopyFrom1To16BytesCase3)
3706
3707L(StrncpyLeave14):
3708 movaps %xmm2, %xmm3
3709 add $48, %ebx
3710 jle L(StrncpyExit14)
3711 palignr $14, %xmm1, %xmm2
3712 movaps %xmm2, (%edx)
3713 movaps 18(%ecx), %xmm2
3714 lea 16(%esi), %esi
3715 sub $16, %ebx
3716 jbe L(StrncpyExit14)
3717 palignr $14, %xmm3, %xmm2
3718 movaps %xmm2, 16(%edx)
3719 lea 16(%esi), %esi
3720 sub $16, %ebx
3721 jbe L(StrncpyExit14)
3722 movaps %xmm4, 32(%edx)
3723 lea 16(%esi), %esi
3724 sub $16, %ebx
3725 jbe L(StrncpyExit14)
3726 movaps %xmm5, 48(%edx)
3727 lea 16(%esi), %esi
3728 lea -16(%ebx), %ebx
3729L(StrncpyExit14):
3730 lea 2(%edx, %esi), %edx
3731 lea 2(%ecx, %esi), %ecx
3732 movw -2(%ecx), %ax
3733 movw %ax, -2(%edx)
3734 xor %esi, %esi
3735 jmp L(CopyFrom1To16BytesCase3)
3736
3737L(StrncpyLeave15):
3738 movaps %xmm2, %xmm3
3739 add $48, %ebx
3740 jle L(StrncpyExit15)
3741 palignr $15, %xmm1, %xmm2
3742 movaps %xmm2, (%edx)
3743 movaps 17(%ecx), %xmm2
3744 lea 16(%esi), %esi
3745 sub $16, %ebx
3746 jbe L(StrncpyExit15)
3747 palignr $15, %xmm3, %xmm2
3748 movaps %xmm2, 16(%edx)
3749 lea 16(%esi), %esi
3750 sub $16, %ebx
3751 jbe L(StrncpyExit15)
3752 movaps %xmm4, 32(%edx)
3753 lea 16(%esi), %esi
3754 sub $16, %ebx
3755 jbe L(StrncpyExit15)
3756 movaps %xmm5, 48(%edx)
3757 lea 16(%esi), %esi
3758 lea -16(%ebx), %ebx
3759L(StrncpyExit15):
3760 lea 1(%edx, %esi), %edx
3761 lea 1(%ecx, %esi), %ecx
3762 movb -1(%ecx), %ah
3763 movb %ah, -1(%edx)
3764 xor %esi, %esi
3765 jmp L(CopyFrom1To16BytesCase3)
3766# endif
3767
3768# ifndef USE_AS_STRCAT
3769# ifdef USE_AS_STRNCPY
3770 CFI_POP (%esi)
3771 CFI_POP (%edi)
3772
3773 .p2align 4
3774L(ExitTail0):
3775 movl %edx, %eax
3776 RETURN
3777
3778 .p2align 4
3779L(StrncpyExit15Bytes):
3780 cmp $12, %ebx
3781 jbe L(StrncpyExit12Bytes)
3782 cmpb $0, 8(%ecx)
3783 jz L(ExitTail9)
3784 cmpb $0, 9(%ecx)
3785 jz L(ExitTail10)
3786 cmpb $0, 10(%ecx)
3787 jz L(ExitTail11)
3788 cmpb $0, 11(%ecx)
3789 jz L(ExitTail12)
3790 cmp $13, %ebx
3791 je L(ExitTail13)
3792 cmpb $0, 12(%ecx)
3793 jz L(ExitTail13)
3794 cmp $14, %ebx
3795 je L(ExitTail14)
3796 cmpb $0, 13(%ecx)
3797 jz L(ExitTail14)
3798 movlpd (%ecx), %xmm0
3799 movlpd 7(%ecx), %xmm1
3800 movlpd %xmm0, (%edx)
3801 movlpd %xmm1, 7(%edx)
3802# ifdef USE_AS_STPCPY
3803 lea 14(%edx), %eax
3804 cmpb $1, (%eax)
3805 sbb $-1, %eax
3806# else
3807 movl %edx, %eax
3808# endif
3809 RETURN
3810
3811 .p2align 4
3812L(StrncpyExit12Bytes):
3813 cmp $9, %ebx
3814 je L(ExitTail9)
3815 cmpb $0, 8(%ecx)
3816 jz L(ExitTail9)
3817 cmp $10, %ebx
3818 je L(ExitTail10)
3819 cmpb $0, 9(%ecx)
3820 jz L(ExitTail10)
3821 cmp $11, %ebx
3822 je L(ExitTail11)
3823 cmpb $0, 10(%ecx)
3824 jz L(ExitTail11)
3825 movlpd (%ecx), %xmm0
3826 movl 8(%ecx), %eax
3827 movlpd %xmm0, (%edx)
3828 movl %eax, 8(%edx)
3829 SAVE_RESULT_TAIL (11)
3830# ifdef USE_AS_STPCPY
3831 cmpb $1, (%eax)
3832 sbb $-1, %eax
3833# endif
3834 RETURN
3835
3836 .p2align 4
3837L(StrncpyExit8Bytes):
3838 cmp $4, %ebx
3839 jbe L(StrncpyExit4Bytes)
3840 cmpb $0, (%ecx)
3841 jz L(ExitTail1)
3842 cmpb $0, 1(%ecx)
3843 jz L(ExitTail2)
3844 cmpb $0, 2(%ecx)
3845 jz L(ExitTail3)
3846 cmpb $0, 3(%ecx)
3847 jz L(ExitTail4)
3848
3849 cmp $5, %ebx
3850 je L(ExitTail5)
3851 cmpb $0, 4(%ecx)
3852 jz L(ExitTail5)
3853 cmp $6, %ebx
3854 je L(ExitTail6)
3855 cmpb $0, 5(%ecx)
3856 jz L(ExitTail6)
3857 cmp $7, %ebx
3858 je L(ExitTail7)
3859 cmpb $0, 6(%ecx)
3860 jz L(ExitTail7)
3861 movlpd (%ecx), %xmm0
3862 movlpd %xmm0, (%edx)
3863# ifdef USE_AS_STPCPY
3864 lea 7(%edx), %eax
3865 cmpb $1, (%eax)
3866 sbb $-1, %eax
3867# else
3868 movl %edx, %eax
3869# endif
3870 RETURN
3871
3872 .p2align 4
3873L(StrncpyExit4Bytes):
3874 test %ebx, %ebx
3875 jz L(ExitTail0)
3876 cmp $1, %ebx
3877 je L(ExitTail1)
3878 cmpb $0, (%ecx)
3879 jz L(ExitTail1)
3880 cmp $2, %ebx
3881 je L(ExitTail2)
3882 cmpb $0, 1(%ecx)
3883 jz L(ExitTail2)
3884 cmp $3, %ebx
3885 je L(ExitTail3)
3886 cmpb $0, 2(%ecx)
3887 jz L(ExitTail3)
3888 movl (%ecx), %eax
3889 movl %eax, (%edx)
3890 SAVE_RESULT_TAIL (3)
3891# ifdef USE_AS_STPCPY
3892 cmpb $1, (%eax)
3893 sbb $-1, %eax
3894# endif
3895 RETURN
3896# endif
3897
3898END (STRCPY)
3899# endif
3900#endif
3901

source code of glibc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S