1 | /* strcat with SSSE3 |
2 | Copyright (C) 2011-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #if IS_IN (libc) |
20 | |
21 | # include <sysdep.h> |
22 | |
23 | # ifndef STRCAT |
24 | # define STRCAT __strcat_ssse3 |
25 | # endif |
26 | |
27 | # define USE_AS_STRCAT |
28 | |
29 | .text |
30 | ENTRY (STRCAT) |
31 | # ifdef USE_AS_STRNCAT |
32 | mov %rdx, %r8 |
33 | # endif |
34 | |
35 | |
36 | /* Inline corresponding strlen file, temporary until new strcpy |
37 | implementation gets merged. */ |
38 | |
39 | xor %eax, %eax |
40 | cmpb $0, (%rdi) |
41 | jz L(exit_tail0) |
42 | cmpb $0, 1(%rdi) |
43 | jz L(exit_tail1) |
44 | cmpb $0, 2(%rdi) |
45 | jz L(exit_tail2) |
46 | cmpb $0, 3(%rdi) |
47 | jz L(exit_tail3) |
48 | |
49 | cmpb $0, 4(%rdi) |
50 | jz L(exit_tail4) |
51 | cmpb $0, 5(%rdi) |
52 | jz L(exit_tail5) |
53 | cmpb $0, 6(%rdi) |
54 | jz L(exit_tail6) |
55 | cmpb $0, 7(%rdi) |
56 | jz L(exit_tail7) |
57 | |
58 | cmpb $0, 8(%rdi) |
59 | jz L(exit_tail8) |
60 | cmpb $0, 9(%rdi) |
61 | jz L(exit_tail9) |
62 | cmpb $0, 10(%rdi) |
63 | jz L(exit_tail10) |
64 | cmpb $0, 11(%rdi) |
65 | jz L(exit_tail11) |
66 | |
67 | cmpb $0, 12(%rdi) |
68 | jz L(exit_tail12) |
69 | cmpb $0, 13(%rdi) |
70 | jz L(exit_tail13) |
71 | cmpb $0, 14(%rdi) |
72 | jz L(exit_tail14) |
73 | cmpb $0, 15(%rdi) |
74 | jz L(exit_tail15) |
75 | pxor %xmm0, %xmm0 |
76 | lea 16(%rdi), %rcx |
77 | lea 16(%rdi), %rax |
78 | and $-16, %rax |
79 | |
80 | pcmpeqb (%rax), %xmm0 |
81 | pmovmskb %xmm0, %edx |
82 | pxor %xmm1, %xmm1 |
83 | test %edx, %edx |
84 | lea 16(%rax), %rax |
85 | jnz L(exit) |
86 | |
87 | pcmpeqb (%rax), %xmm1 |
88 | pmovmskb %xmm1, %edx |
89 | pxor %xmm2, %xmm2 |
90 | test %edx, %edx |
91 | lea 16(%rax), %rax |
92 | jnz L(exit) |
93 | |
94 | pcmpeqb (%rax), %xmm2 |
95 | pmovmskb %xmm2, %edx |
96 | pxor %xmm3, %xmm3 |
97 | test %edx, %edx |
98 | lea 16(%rax), %rax |
99 | jnz L(exit) |
100 | |
101 | pcmpeqb (%rax), %xmm3 |
102 | pmovmskb %xmm3, %edx |
103 | test %edx, %edx |
104 | lea 16(%rax), %rax |
105 | jnz L(exit) |
106 | |
107 | pcmpeqb (%rax), %xmm0 |
108 | pmovmskb %xmm0, %edx |
109 | test %edx, %edx |
110 | lea 16(%rax), %rax |
111 | jnz L(exit) |
112 | |
113 | pcmpeqb (%rax), %xmm1 |
114 | pmovmskb %xmm1, %edx |
115 | test %edx, %edx |
116 | lea 16(%rax), %rax |
117 | jnz L(exit) |
118 | |
119 | pcmpeqb (%rax), %xmm2 |
120 | pmovmskb %xmm2, %edx |
121 | test %edx, %edx |
122 | lea 16(%rax), %rax |
123 | jnz L(exit) |
124 | |
125 | pcmpeqb (%rax), %xmm3 |
126 | pmovmskb %xmm3, %edx |
127 | test %edx, %edx |
128 | lea 16(%rax), %rax |
129 | jnz L(exit) |
130 | |
131 | pcmpeqb (%rax), %xmm0 |
132 | pmovmskb %xmm0, %edx |
133 | test %edx, %edx |
134 | lea 16(%rax), %rax |
135 | jnz L(exit) |
136 | |
137 | pcmpeqb (%rax), %xmm1 |
138 | pmovmskb %xmm1, %edx |
139 | test %edx, %edx |
140 | lea 16(%rax), %rax |
141 | jnz L(exit) |
142 | |
143 | pcmpeqb (%rax), %xmm2 |
144 | pmovmskb %xmm2, %edx |
145 | test %edx, %edx |
146 | lea 16(%rax), %rax |
147 | jnz L(exit) |
148 | |
149 | pcmpeqb (%rax), %xmm3 |
150 | pmovmskb %xmm3, %edx |
151 | test %edx, %edx |
152 | lea 16(%rax), %rax |
153 | jnz L(exit) |
154 | |
155 | pcmpeqb (%rax), %xmm0 |
156 | pmovmskb %xmm0, %edx |
157 | test %edx, %edx |
158 | lea 16(%rax), %rax |
159 | jnz L(exit) |
160 | |
161 | pcmpeqb (%rax), %xmm1 |
162 | pmovmskb %xmm1, %edx |
163 | test %edx, %edx |
164 | lea 16(%rax), %rax |
165 | jnz L(exit) |
166 | |
167 | pcmpeqb (%rax), %xmm2 |
168 | pmovmskb %xmm2, %edx |
169 | test %edx, %edx |
170 | lea 16(%rax), %rax |
171 | jnz L(exit) |
172 | |
173 | pcmpeqb (%rax), %xmm3 |
174 | pmovmskb %xmm3, %edx |
175 | test %edx, %edx |
176 | lea 16(%rax), %rax |
177 | jnz L(exit) |
178 | |
179 | and $-0x40, %rax |
180 | |
181 | .p2align 4 |
182 | L(aligned_64): |
183 | pcmpeqb (%rax), %xmm0 |
184 | pcmpeqb 16(%rax), %xmm1 |
185 | pcmpeqb 32(%rax), %xmm2 |
186 | pcmpeqb 48(%rax), %xmm3 |
187 | pmovmskb %xmm0, %edx |
188 | pmovmskb %xmm1, %r11d |
189 | pmovmskb %xmm2, %r10d |
190 | pmovmskb %xmm3, %r9d |
191 | or %edx, %r9d |
192 | or %r11d, %r9d |
193 | or %r10d, %r9d |
194 | lea 64(%rax), %rax |
195 | jz L(aligned_64) |
196 | |
197 | test %edx, %edx |
198 | jnz L(aligned_64_exit_16) |
199 | test %r11d, %r11d |
200 | jnz L(aligned_64_exit_32) |
201 | test %r10d, %r10d |
202 | jnz L(aligned_64_exit_48) |
203 | |
204 | L(aligned_64_exit_64): |
205 | pmovmskb %xmm3, %edx |
206 | jmp L(exit) |
207 | |
208 | L(aligned_64_exit_48): |
209 | lea -16(%rax), %rax |
210 | mov %r10d, %edx |
211 | jmp L(exit) |
212 | |
213 | L(aligned_64_exit_32): |
214 | lea -32(%rax), %rax |
215 | mov %r11d, %edx |
216 | jmp L(exit) |
217 | |
218 | L(aligned_64_exit_16): |
219 | lea -48(%rax), %rax |
220 | |
221 | L(exit): |
222 | sub %rcx, %rax |
223 | test %dl, %dl |
224 | jz L(exit_high) |
225 | test $0x01, %dl |
226 | jnz L(exit_tail0) |
227 | |
228 | test $0x02, %dl |
229 | jnz L(exit_tail1) |
230 | |
231 | test $0x04, %dl |
232 | jnz L(exit_tail2) |
233 | |
234 | test $0x08, %dl |
235 | jnz L(exit_tail3) |
236 | |
237 | test $0x10, %dl |
238 | jnz L(exit_tail4) |
239 | |
240 | test $0x20, %dl |
241 | jnz L(exit_tail5) |
242 | |
243 | test $0x40, %dl |
244 | jnz L(exit_tail6) |
245 | add $7, %eax |
246 | L(exit_tail0): |
247 | jmp L(StartStrcpyPart) |
248 | |
249 | .p2align 4 |
250 | L(exit_high): |
251 | add $8, %eax |
252 | test $0x01, %dh |
253 | jnz L(exit_tail0) |
254 | |
255 | test $0x02, %dh |
256 | jnz L(exit_tail1) |
257 | |
258 | test $0x04, %dh |
259 | jnz L(exit_tail2) |
260 | |
261 | test $0x08, %dh |
262 | jnz L(exit_tail3) |
263 | |
264 | test $0x10, %dh |
265 | jnz L(exit_tail4) |
266 | |
267 | test $0x20, %dh |
268 | jnz L(exit_tail5) |
269 | |
270 | test $0x40, %dh |
271 | jnz L(exit_tail6) |
272 | add $7, %eax |
273 | jmp L(StartStrcpyPart) |
274 | |
275 | .p2align 4 |
276 | L(exit_tail1): |
277 | add $1, %eax |
278 | jmp L(StartStrcpyPart) |
279 | |
280 | .p2align 4 |
281 | L(exit_tail2): |
282 | add $2, %eax |
283 | jmp L(StartStrcpyPart) |
284 | |
285 | .p2align 4 |
286 | L(exit_tail3): |
287 | add $3, %eax |
288 | jmp L(StartStrcpyPart) |
289 | |
290 | .p2align 4 |
291 | L(exit_tail4): |
292 | add $4, %eax |
293 | jmp L(StartStrcpyPart) |
294 | |
295 | .p2align 4 |
296 | L(exit_tail5): |
297 | add $5, %eax |
298 | jmp L(StartStrcpyPart) |
299 | |
300 | .p2align 4 |
301 | L(exit_tail6): |
302 | add $6, %eax |
303 | jmp L(StartStrcpyPart) |
304 | |
305 | .p2align 4 |
306 | L(exit_tail7): |
307 | add $7, %eax |
308 | jmp L(StartStrcpyPart) |
309 | |
310 | .p2align 4 |
311 | L(exit_tail8): |
312 | add $8, %eax |
313 | jmp L(StartStrcpyPart) |
314 | |
315 | .p2align 4 |
316 | L(exit_tail9): |
317 | add $9, %eax |
318 | jmp L(StartStrcpyPart) |
319 | |
320 | .p2align 4 |
321 | L(exit_tail10): |
322 | add $10, %eax |
323 | jmp L(StartStrcpyPart) |
324 | |
325 | .p2align 4 |
326 | L(exit_tail11): |
327 | add $11, %eax |
328 | jmp L(StartStrcpyPart) |
329 | |
330 | .p2align 4 |
331 | L(exit_tail12): |
332 | add $12, %eax |
333 | jmp L(StartStrcpyPart) |
334 | |
335 | .p2align 4 |
336 | L(exit_tail13): |
337 | add $13, %eax |
338 | jmp L(StartStrcpyPart) |
339 | |
340 | .p2align 4 |
341 | L(exit_tail14): |
342 | add $14, %eax |
343 | jmp L(StartStrcpyPart) |
344 | |
345 | .p2align 4 |
346 | L(exit_tail15): |
347 | add $15, %eax |
348 | |
349 | .p2align 4 |
350 | L(StartStrcpyPart): |
351 | mov %rsi, %rcx |
352 | lea (%rdi, %rax), %rdx |
353 | # ifdef USE_AS_STRNCAT |
354 | test %r8, %r8 |
355 | jz L(StrncatExit0) |
356 | cmp $8, %r8 |
357 | jbe L(StrncatExit8Bytes) |
358 | # endif |
359 | cmpb $0, (%rcx) |
360 | jz L(Exit1) |
361 | cmpb $0, 1(%rcx) |
362 | jz L(Exit2) |
363 | cmpb $0, 2(%rcx) |
364 | jz L(Exit3) |
365 | cmpb $0, 3(%rcx) |
366 | jz L(Exit4) |
367 | cmpb $0, 4(%rcx) |
368 | jz L(Exit5) |
369 | cmpb $0, 5(%rcx) |
370 | jz L(Exit6) |
371 | cmpb $0, 6(%rcx) |
372 | jz L(Exit7) |
373 | cmpb $0, 7(%rcx) |
374 | jz L(Exit8) |
375 | cmpb $0, 8(%rcx) |
376 | jz L(Exit9) |
377 | # ifdef USE_AS_STRNCAT |
378 | cmp $16, %r8 |
379 | jb L(StrncatExit15Bytes) |
380 | # endif |
381 | cmpb $0, 9(%rcx) |
382 | jz L(Exit10) |
383 | cmpb $0, 10(%rcx) |
384 | jz L(Exit11) |
385 | cmpb $0, 11(%rcx) |
386 | jz L(Exit12) |
387 | cmpb $0, 12(%rcx) |
388 | jz L(Exit13) |
389 | cmpb $0, 13(%rcx) |
390 | jz L(Exit14) |
391 | cmpb $0, 14(%rcx) |
392 | jz L(Exit15) |
393 | cmpb $0, 15(%rcx) |
394 | jz L(Exit16) |
395 | # ifdef USE_AS_STRNCAT |
396 | cmp $16, %r8 |
397 | je L(StrncatExit16) |
398 | # define USE_AS_STRNCPY |
399 | # endif |
400 | |
401 | # include "strcpy-ssse3.S" |
402 | |
403 | .p2align 4 |
404 | L(CopyFrom1To16Bytes): |
405 | add %rsi, %rdx |
406 | add %rsi, %rcx |
407 | |
408 | test %al, %al |
409 | jz L(ExitHigh) |
410 | test $0x01, %al |
411 | jnz L(Exit1) |
412 | test $0x02, %al |
413 | jnz L(Exit2) |
414 | test $0x04, %al |
415 | jnz L(Exit3) |
416 | test $0x08, %al |
417 | jnz L(Exit4) |
418 | test $0x10, %al |
419 | jnz L(Exit5) |
420 | test $0x20, %al |
421 | jnz L(Exit6) |
422 | test $0x40, %al |
423 | jnz L(Exit7) |
424 | movlpd (%rcx), %xmm0 |
425 | movlpd %xmm0, (%rdx) |
426 | mov %rdi, %rax |
427 | ret |
428 | |
429 | .p2align 4 |
430 | L(ExitHigh): |
431 | test $0x01, %ah |
432 | jnz L(Exit9) |
433 | test $0x02, %ah |
434 | jnz L(Exit10) |
435 | test $0x04, %ah |
436 | jnz L(Exit11) |
437 | test $0x08, %ah |
438 | jnz L(Exit12) |
439 | test $0x10, %ah |
440 | jnz L(Exit13) |
441 | test $0x20, %ah |
442 | jnz L(Exit14) |
443 | test $0x40, %ah |
444 | jnz L(Exit15) |
445 | movlpd (%rcx), %xmm0 |
446 | movlpd 8(%rcx), %xmm1 |
447 | movlpd %xmm0, (%rdx) |
448 | movlpd %xmm1, 8(%rdx) |
449 | mov %rdi, %rax |
450 | ret |
451 | |
452 | .p2align 4 |
453 | L(StrncatExit1): |
454 | xor %ah, %ah |
455 | movb %ah, 1(%rdx) |
456 | L(Exit1): |
457 | movb (%rcx), %al |
458 | movb %al, (%rdx) |
459 | mov %rdi, %rax |
460 | ret |
461 | |
462 | .p2align 4 |
463 | L(StrncatExit2): |
464 | xor %ah, %ah |
465 | movb %ah, 2(%rdx) |
466 | L(Exit2): |
467 | movw (%rcx), %ax |
468 | movw %ax, (%rdx) |
469 | mov %rdi, %rax |
470 | ret |
471 | |
472 | .p2align 4 |
473 | L(StrncatExit3): |
474 | xor %ah, %ah |
475 | movb %ah, 3(%rdx) |
476 | L(Exit3): |
477 | movw (%rcx), %ax |
478 | movw %ax, (%rdx) |
479 | movb 2(%rcx), %al |
480 | movb %al, 2(%rdx) |
481 | mov %rdi, %rax |
482 | ret |
483 | |
484 | .p2align 4 |
485 | L(StrncatExit4): |
486 | xor %ah, %ah |
487 | movb %ah, 4(%rdx) |
488 | L(Exit4): |
489 | mov (%rcx), %eax |
490 | mov %eax, (%rdx) |
491 | mov %rdi, %rax |
492 | ret |
493 | |
494 | .p2align 4 |
495 | L(StrncatExit5): |
496 | xor %ah, %ah |
497 | movb %ah, 5(%rdx) |
498 | L(Exit5): |
499 | mov (%rcx), %eax |
500 | mov %eax, (%rdx) |
501 | movb 4(%rcx), %al |
502 | movb %al, 4(%rdx) |
503 | mov %rdi, %rax |
504 | ret |
505 | |
506 | .p2align 4 |
507 | L(StrncatExit6): |
508 | xor %ah, %ah |
509 | movb %ah, 6(%rdx) |
510 | L(Exit6): |
511 | mov (%rcx), %eax |
512 | mov %eax, (%rdx) |
513 | movw 4(%rcx), %ax |
514 | movw %ax, 4(%rdx) |
515 | mov %rdi, %rax |
516 | ret |
517 | |
518 | .p2align 4 |
519 | L(StrncatExit7): |
520 | xor %ah, %ah |
521 | movb %ah, 7(%rdx) |
522 | L(Exit7): |
523 | mov (%rcx), %eax |
524 | mov %eax, (%rdx) |
525 | mov 3(%rcx), %eax |
526 | mov %eax, 3(%rdx) |
527 | mov %rdi, %rax |
528 | ret |
529 | |
530 | .p2align 4 |
531 | L(StrncatExit8): |
532 | xor %ah, %ah |
533 | movb %ah, 8(%rdx) |
534 | L(Exit8): |
535 | movlpd (%rcx), %xmm0 |
536 | movlpd %xmm0, (%rdx) |
537 | mov %rdi, %rax |
538 | ret |
539 | |
540 | .p2align 4 |
541 | L(StrncatExit9): |
542 | xor %ah, %ah |
543 | movb %ah, 9(%rdx) |
544 | L(Exit9): |
545 | movlpd (%rcx), %xmm0 |
546 | movlpd %xmm0, (%rdx) |
547 | movb 8(%rcx), %al |
548 | movb %al, 8(%rdx) |
549 | mov %rdi, %rax |
550 | ret |
551 | |
552 | .p2align 4 |
553 | L(StrncatExit10): |
554 | xor %ah, %ah |
555 | movb %ah, 10(%rdx) |
556 | L(Exit10): |
557 | movlpd (%rcx), %xmm0 |
558 | movlpd %xmm0, (%rdx) |
559 | movw 8(%rcx), %ax |
560 | movw %ax, 8(%rdx) |
561 | mov %rdi, %rax |
562 | ret |
563 | |
564 | .p2align 4 |
565 | L(StrncatExit11): |
566 | xor %ah, %ah |
567 | movb %ah, 11(%rdx) |
568 | L(Exit11): |
569 | movlpd (%rcx), %xmm0 |
570 | movlpd %xmm0, (%rdx) |
571 | mov 7(%rcx), %eax |
572 | mov %eax, 7(%rdx) |
573 | mov %rdi, %rax |
574 | ret |
575 | |
576 | .p2align 4 |
577 | L(StrncatExit12): |
578 | xor %ah, %ah |
579 | movb %ah, 12(%rdx) |
580 | L(Exit12): |
581 | movlpd (%rcx), %xmm0 |
582 | movlpd %xmm0, (%rdx) |
583 | mov 8(%rcx), %eax |
584 | mov %eax, 8(%rdx) |
585 | mov %rdi, %rax |
586 | ret |
587 | |
588 | .p2align 4 |
589 | L(StrncatExit13): |
590 | xor %ah, %ah |
591 | movb %ah, 13(%rdx) |
592 | L(Exit13): |
593 | movlpd (%rcx), %xmm0 |
594 | movlpd %xmm0, (%rdx) |
595 | movlpd 5(%rcx), %xmm1 |
596 | movlpd %xmm1, 5(%rdx) |
597 | mov %rdi, %rax |
598 | ret |
599 | |
600 | .p2align 4 |
601 | L(StrncatExit14): |
602 | xor %ah, %ah |
603 | movb %ah, 14(%rdx) |
604 | L(Exit14): |
605 | movlpd (%rcx), %xmm0 |
606 | movlpd %xmm0, (%rdx) |
607 | movlpd 6(%rcx), %xmm1 |
608 | movlpd %xmm1, 6(%rdx) |
609 | mov %rdi, %rax |
610 | ret |
611 | |
612 | .p2align 4 |
613 | L(StrncatExit15): |
614 | xor %ah, %ah |
615 | movb %ah, 15(%rdx) |
616 | L(Exit15): |
617 | movlpd (%rcx), %xmm0 |
618 | movlpd %xmm0, (%rdx) |
619 | movlpd 7(%rcx), %xmm1 |
620 | movlpd %xmm1, 7(%rdx) |
621 | mov %rdi, %rax |
622 | ret |
623 | |
624 | .p2align 4 |
625 | L(StrncatExit16): |
626 | xor %ah, %ah |
627 | movb %ah, 16(%rdx) |
628 | L(Exit16): |
629 | movlpd (%rcx), %xmm0 |
630 | movlpd 8(%rcx), %xmm1 |
631 | movlpd %xmm0, (%rdx) |
632 | movlpd %xmm1, 8(%rdx) |
633 | mov %rdi, %rax |
634 | ret |
635 | |
636 | # ifdef USE_AS_STRNCPY |
637 | |
638 | .p2align 4 |
639 | L(CopyFrom1To16BytesCase2): |
640 | add $16, %r8 |
641 | add %rsi, %rcx |
642 | lea (%rsi, %rdx), %rsi |
643 | lea -9(%r8), %rdx |
644 | and $1<<7, %dh |
645 | or %al, %dh |
646 | test %dh, %dh |
647 | lea (%rsi), %rdx |
648 | jz L(ExitHighCase2) |
649 | |
650 | test $0x01, %al |
651 | jnz L(Exit1) |
652 | cmp $1, %r8 |
653 | je L(StrncatExit1) |
654 | test $0x02, %al |
655 | jnz L(Exit2) |
656 | cmp $2, %r8 |
657 | je L(StrncatExit2) |
658 | test $0x04, %al |
659 | jnz L(Exit3) |
660 | cmp $3, %r8 |
661 | je L(StrncatExit3) |
662 | test $0x08, %al |
663 | jnz L(Exit4) |
664 | cmp $4, %r8 |
665 | je L(StrncatExit4) |
666 | test $0x10, %al |
667 | jnz L(Exit5) |
668 | cmp $5, %r8 |
669 | je L(StrncatExit5) |
670 | test $0x20, %al |
671 | jnz L(Exit6) |
672 | cmp $6, %r8 |
673 | je L(StrncatExit6) |
674 | test $0x40, %al |
675 | jnz L(Exit7) |
676 | cmp $7, %r8 |
677 | je L(StrncatExit7) |
678 | movlpd (%rcx), %xmm0 |
679 | movlpd %xmm0, (%rdx) |
680 | lea 7(%rdx), %rax |
681 | cmpb $1, (%rax) |
682 | sbb $-1, %rax |
683 | xor %cl, %cl |
684 | movb %cl, (%rax) |
685 | mov %rdi, %rax |
686 | ret |
687 | |
688 | .p2align 4 |
689 | L(ExitHighCase2): |
690 | test $0x01, %ah |
691 | jnz L(Exit9) |
692 | cmp $9, %r8 |
693 | je L(StrncatExit9) |
694 | test $0x02, %ah |
695 | jnz L(Exit10) |
696 | cmp $10, %r8 |
697 | je L(StrncatExit10) |
698 | test $0x04, %ah |
699 | jnz L(Exit11) |
700 | cmp $11, %r8 |
701 | je L(StrncatExit11) |
702 | test $0x8, %ah |
703 | jnz L(Exit12) |
704 | cmp $12, %r8 |
705 | je L(StrncatExit12) |
706 | test $0x10, %ah |
707 | jnz L(Exit13) |
708 | cmp $13, %r8 |
709 | je L(StrncatExit13) |
710 | test $0x20, %ah |
711 | jnz L(Exit14) |
712 | cmp $14, %r8 |
713 | je L(StrncatExit14) |
714 | test $0x40, %ah |
715 | jnz L(Exit15) |
716 | cmp $15, %r8 |
717 | je L(StrncatExit15) |
718 | movlpd (%rcx), %xmm0 |
719 | movlpd %xmm0, (%rdx) |
720 | movlpd 8(%rcx), %xmm1 |
721 | movlpd %xmm1, 8(%rdx) |
722 | mov %rdi, %rax |
723 | ret |
724 | |
725 | L(CopyFrom1To16BytesCase2OrCase3): |
726 | test %rax, %rax |
727 | jnz L(CopyFrom1To16BytesCase2) |
728 | |
729 | .p2align 4 |
730 | L(CopyFrom1To16BytesCase3): |
731 | add $16, %r8 |
732 | add %rsi, %rdx |
733 | add %rsi, %rcx |
734 | |
735 | cmp $8, %r8 |
736 | ja L(ExitHighCase3) |
737 | cmp $1, %r8 |
738 | je L(StrncatExit1) |
739 | cmp $2, %r8 |
740 | je L(StrncatExit2) |
741 | cmp $3, %r8 |
742 | je L(StrncatExit3) |
743 | cmp $4, %r8 |
744 | je L(StrncatExit4) |
745 | cmp $5, %r8 |
746 | je L(StrncatExit5) |
747 | cmp $6, %r8 |
748 | je L(StrncatExit6) |
749 | cmp $7, %r8 |
750 | je L(StrncatExit7) |
751 | movlpd (%rcx), %xmm0 |
752 | movlpd %xmm0, (%rdx) |
753 | xor %ah, %ah |
754 | movb %ah, 8(%rdx) |
755 | mov %rdi, %rax |
756 | ret |
757 | |
758 | .p2align 4 |
759 | L(ExitHighCase3): |
760 | cmp $9, %r8 |
761 | je L(StrncatExit9) |
762 | cmp $10, %r8 |
763 | je L(StrncatExit10) |
764 | cmp $11, %r8 |
765 | je L(StrncatExit11) |
766 | cmp $12, %r8 |
767 | je L(StrncatExit12) |
768 | cmp $13, %r8 |
769 | je L(StrncatExit13) |
770 | cmp $14, %r8 |
771 | je L(StrncatExit14) |
772 | cmp $15, %r8 |
773 | je L(StrncatExit15) |
774 | movlpd (%rcx), %xmm0 |
775 | movlpd %xmm0, (%rdx) |
776 | movlpd 8(%rcx), %xmm1 |
777 | movlpd %xmm1, 8(%rdx) |
778 | xor %ah, %ah |
779 | movb %ah, 16(%rdx) |
780 | mov %rdi, %rax |
781 | ret |
782 | |
783 | .p2align 4 |
784 | L(StrncatExit0): |
785 | mov %rdi, %rax |
786 | ret |
787 | |
788 | .p2align 4 |
789 | L(StrncatExit15Bytes): |
790 | cmp $9, %r8 |
791 | je L(StrncatExit9) |
792 | cmpb $0, 9(%rcx) |
793 | jz L(Exit10) |
794 | cmp $10, %r8 |
795 | je L(StrncatExit10) |
796 | cmpb $0, 10(%rcx) |
797 | jz L(Exit11) |
798 | cmp $11, %r8 |
799 | je L(StrncatExit11) |
800 | cmpb $0, 11(%rcx) |
801 | jz L(Exit12) |
802 | cmp $12, %r8 |
803 | je L(StrncatExit12) |
804 | cmpb $0, 12(%rcx) |
805 | jz L(Exit13) |
806 | cmp $13, %r8 |
807 | je L(StrncatExit13) |
808 | cmpb $0, 13(%rcx) |
809 | jz L(Exit14) |
810 | cmp $14, %r8 |
811 | je L(StrncatExit14) |
812 | movlpd (%rcx), %xmm0 |
813 | movlpd %xmm0, (%rdx) |
814 | movlpd 7(%rcx), %xmm1 |
815 | movlpd %xmm1, 7(%rdx) |
816 | lea 14(%rdx), %rax |
817 | cmpb $1, (%rax) |
818 | sbb $-1, %rax |
819 | xor %cl, %cl |
820 | movb %cl, (%rax) |
821 | mov %rdi, %rax |
822 | ret |
823 | |
824 | .p2align 4 |
825 | L(StrncatExit8Bytes): |
826 | cmpb $0, (%rcx) |
827 | jz L(Exit1) |
828 | cmp $1, %r8 |
829 | je L(StrncatExit1) |
830 | cmpb $0, 1(%rcx) |
831 | jz L(Exit2) |
832 | cmp $2, %r8 |
833 | je L(StrncatExit2) |
834 | cmpb $0, 2(%rcx) |
835 | jz L(Exit3) |
836 | cmp $3, %r8 |
837 | je L(StrncatExit3) |
838 | cmpb $0, 3(%rcx) |
839 | jz L(Exit4) |
840 | cmp $4, %r8 |
841 | je L(StrncatExit4) |
842 | cmpb $0, 4(%rcx) |
843 | jz L(Exit5) |
844 | cmp $5, %r8 |
845 | je L(StrncatExit5) |
846 | cmpb $0, 5(%rcx) |
847 | jz L(Exit6) |
848 | cmp $6, %r8 |
849 | je L(StrncatExit6) |
850 | cmpb $0, 6(%rcx) |
851 | jz L(Exit7) |
852 | cmp $7, %r8 |
853 | je L(StrncatExit7) |
854 | movlpd (%rcx), %xmm0 |
855 | movlpd %xmm0, (%rdx) |
856 | lea 7(%rdx), %rax |
857 | cmpb $1, (%rax) |
858 | sbb $-1, %rax |
859 | xor %cl, %cl |
860 | movb %cl, (%rax) |
861 | mov %rdi, %rax |
862 | ret |
863 | |
864 | # endif |
865 | END (STRCAT) |
866 | #endif |
867 | |