1 | /* strcpy with SSSE3 |
2 | Copyright (C) 2011-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | |
20 | #if IS_IN (libc) |
21 | |
22 | # ifndef USE_AS_STRCAT |
23 | # include <sysdep.h> |
24 | |
25 | # define CFI_PUSH(REG) \ |
26 | cfi_adjust_cfa_offset (4); \ |
27 | cfi_rel_offset (REG, 0) |
28 | |
29 | # define CFI_POP(REG) \ |
30 | cfi_adjust_cfa_offset (-4); \ |
31 | cfi_restore (REG) |
32 | |
33 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) |
34 | # define POP(REG) popl REG; CFI_POP (REG) |
35 | |
36 | # ifndef STRCPY |
37 | # define STRCPY __strcpy_ssse3 |
38 | # endif |
39 | |
40 | # ifdef USE_AS_STRNCPY |
41 | # define PARMS 8 |
42 | # define ENTRANCE PUSH (%ebx) |
43 | # define RETURN POP (%ebx); ret; CFI_PUSH (%ebx); |
44 | # define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi) |
45 | # else |
46 | # define PARMS 4 |
47 | # define ENTRANCE |
48 | # define RETURN ret |
49 | # define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) |
50 | # endif |
51 | |
52 | # ifdef USE_AS_STPCPY |
53 | # define SAVE_RESULT(n) lea n(%edx), %eax |
54 | # define SAVE_RESULT_TAIL(n) lea n(%edx), %eax |
55 | # else |
56 | # define SAVE_RESULT(n) movl %edi, %eax |
57 | # define SAVE_RESULT_TAIL(n) movl %edx, %eax |
58 | # endif |
59 | |
60 | # define STR1 PARMS |
61 | # define STR2 STR1+4 |
62 | # define LEN STR2+4 |
63 | |
64 | /* In this code following instructions are used for copying: |
65 | movb - 1 byte |
66 | movw - 2 byte |
67 | movl - 4 byte |
68 | movlpd - 8 byte |
69 | movaps - 16 byte - requires 16 byte alignment |
70 | of source and destination addresses. |
71 | */ |
72 | |
73 | .text |
74 | ENTRY (STRCPY) |
75 | ENTRANCE |
76 | mov STR1(%esp), %edx |
77 | mov STR2(%esp), %ecx |
78 | # ifdef USE_AS_STRNCPY |
79 | movl LEN(%esp), %ebx |
80 | cmp $8, %ebx |
81 | jbe L(StrncpyExit8Bytes) |
82 | # endif |
83 | cmpb $0, (%ecx) |
84 | jz L(ExitTail1) |
85 | cmpb $0, 1(%ecx) |
86 | jz L(ExitTail2) |
87 | cmpb $0, 2(%ecx) |
88 | jz L(ExitTail3) |
89 | cmpb $0, 3(%ecx) |
90 | jz L(ExitTail4) |
91 | cmpb $0, 4(%ecx) |
92 | jz L(ExitTail5) |
93 | cmpb $0, 5(%ecx) |
94 | jz L(ExitTail6) |
95 | cmpb $0, 6(%ecx) |
96 | jz L(ExitTail7) |
97 | cmpb $0, 7(%ecx) |
98 | jz L(ExitTail8) |
99 | # ifdef USE_AS_STRNCPY |
100 | cmp $16, %ebx |
101 | jb L(StrncpyExit15Bytes) |
102 | # endif |
103 | cmpb $0, 8(%ecx) |
104 | jz L(ExitTail9) |
105 | cmpb $0, 9(%ecx) |
106 | jz L(ExitTail10) |
107 | cmpb $0, 10(%ecx) |
108 | jz L(ExitTail11) |
109 | cmpb $0, 11(%ecx) |
110 | jz L(ExitTail12) |
111 | cmpb $0, 12(%ecx) |
112 | jz L(ExitTail13) |
113 | cmpb $0, 13(%ecx) |
114 | jz L(ExitTail14) |
115 | cmpb $0, 14(%ecx) |
116 | jz L(ExitTail15) |
117 | # ifdef USE_AS_STRNCPY |
118 | cmp $16, %ebx |
119 | je L(ExitTail16) |
120 | # endif |
121 | cmpb $0, 15(%ecx) |
122 | jz L(ExitTail16) |
123 | |
124 | PUSH (%edi) |
125 | mov %edx, %edi |
126 | # endif |
127 | PUSH (%esi) |
128 | # ifdef USE_AS_STRNCPY |
129 | mov %ecx, %esi |
130 | sub $16, %ebx |
131 | and $0xf, %esi |
132 | |
133 | /* add 16 bytes ecx_offset to ebx */ |
134 | |
135 | add %esi, %ebx |
136 | # endif |
137 | lea 16(%ecx), %esi |
138 | and $-16, %esi |
139 | pxor %xmm0, %xmm0 |
140 | movlpd (%ecx), %xmm1 |
141 | movlpd %xmm1, (%edx) |
142 | |
143 | pcmpeqb (%esi), %xmm0 |
144 | movlpd 8(%ecx), %xmm1 |
145 | movlpd %xmm1, 8(%edx) |
146 | |
147 | pmovmskb %xmm0, %eax |
148 | sub %ecx, %esi |
149 | |
150 | # ifdef USE_AS_STRNCPY |
151 | sub $16, %ebx |
152 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
153 | # endif |
154 | test %eax, %eax |
155 | jnz L(CopyFrom1To16Bytes) |
156 | |
157 | mov %edx, %eax |
158 | lea 16(%edx), %edx |
159 | and $-16, %edx |
160 | sub %edx, %eax |
161 | |
162 | # ifdef USE_AS_STRNCPY |
163 | add %eax, %esi |
164 | lea -1(%esi), %esi |
165 | and $1<<31, %esi |
166 | test %esi, %esi |
167 | jnz L(ContinueCopy) |
168 | lea 16(%ebx), %ebx |
169 | |
170 | L(ContinueCopy): |
171 | # endif |
172 | sub %eax, %ecx |
173 | mov %ecx, %eax |
174 | and $0xf, %eax |
175 | mov $0, %esi |
176 | |
177 | /* case: ecx_offset == edx_offset */ |
178 | |
179 | jz L(Align16Both) |
180 | |
181 | cmp $8, %eax |
182 | jae L(ShlHigh8) |
183 | cmp $1, %eax |
184 | je L(Shl1) |
185 | cmp $2, %eax |
186 | je L(Shl2) |
187 | cmp $3, %eax |
188 | je L(Shl3) |
189 | cmp $4, %eax |
190 | je L(Shl4) |
191 | cmp $5, %eax |
192 | je L(Shl5) |
193 | cmp $6, %eax |
194 | je L(Shl6) |
195 | jmp L(Shl7) |
196 | |
197 | L(ShlHigh8): |
198 | je L(Shl8) |
199 | cmp $9, %eax |
200 | je L(Shl9) |
201 | cmp $10, %eax |
202 | je L(Shl10) |
203 | cmp $11, %eax |
204 | je L(Shl11) |
205 | cmp $12, %eax |
206 | je L(Shl12) |
207 | cmp $13, %eax |
208 | je L(Shl13) |
209 | cmp $14, %eax |
210 | je L(Shl14) |
211 | jmp L(Shl15) |
212 | |
213 | L(Align16Both): |
214 | movaps (%ecx), %xmm1 |
215 | movaps 16(%ecx), %xmm2 |
216 | movaps %xmm1, (%edx) |
217 | pcmpeqb %xmm2, %xmm0 |
218 | pmovmskb %xmm0, %eax |
219 | lea 16(%esi), %esi |
220 | # ifdef USE_AS_STRNCPY |
221 | sub $16, %ebx |
222 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
223 | # endif |
224 | test %eax, %eax |
225 | jnz L(CopyFrom1To16Bytes) |
226 | |
227 | movaps 16(%ecx, %esi), %xmm3 |
228 | movaps %xmm2, (%edx, %esi) |
229 | pcmpeqb %xmm3, %xmm0 |
230 | pmovmskb %xmm0, %eax |
231 | lea 16(%esi), %esi |
232 | # ifdef USE_AS_STRNCPY |
233 | sub $16, %ebx |
234 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
235 | # endif |
236 | test %eax, %eax |
237 | jnz L(CopyFrom1To16Bytes) |
238 | |
239 | movaps 16(%ecx, %esi), %xmm4 |
240 | movaps %xmm3, (%edx, %esi) |
241 | pcmpeqb %xmm4, %xmm0 |
242 | pmovmskb %xmm0, %eax |
243 | lea 16(%esi), %esi |
244 | # ifdef USE_AS_STRNCPY |
245 | sub $16, %ebx |
246 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
247 | # endif |
248 | test %eax, %eax |
249 | jnz L(CopyFrom1To16Bytes) |
250 | |
251 | movaps 16(%ecx, %esi), %xmm1 |
252 | movaps %xmm4, (%edx, %esi) |
253 | pcmpeqb %xmm1, %xmm0 |
254 | pmovmskb %xmm0, %eax |
255 | lea 16(%esi), %esi |
256 | # ifdef USE_AS_STRNCPY |
257 | sub $16, %ebx |
258 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
259 | # endif |
260 | test %eax, %eax |
261 | jnz L(CopyFrom1To16Bytes) |
262 | |
263 | movaps 16(%ecx, %esi), %xmm2 |
264 | movaps %xmm1, (%edx, %esi) |
265 | pcmpeqb %xmm2, %xmm0 |
266 | pmovmskb %xmm0, %eax |
267 | lea 16(%esi), %esi |
268 | # ifdef USE_AS_STRNCPY |
269 | sub $16, %ebx |
270 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
271 | # endif |
272 | test %eax, %eax |
273 | jnz L(CopyFrom1To16Bytes) |
274 | |
275 | movaps 16(%ecx, %esi), %xmm3 |
276 | movaps %xmm2, (%edx, %esi) |
277 | pcmpeqb %xmm3, %xmm0 |
278 | pmovmskb %xmm0, %eax |
279 | lea 16(%esi), %esi |
280 | # ifdef USE_AS_STRNCPY |
281 | sub $16, %ebx |
282 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
283 | # endif |
284 | test %eax, %eax |
285 | jnz L(CopyFrom1To16Bytes) |
286 | |
287 | movaps %xmm3, (%edx, %esi) |
288 | mov %ecx, %eax |
289 | lea 16(%ecx, %esi), %ecx |
290 | and $-0x40, %ecx |
291 | sub %ecx, %eax |
292 | sub %eax, %edx |
293 | # ifdef USE_AS_STRNCPY |
294 | lea 112(%ebx, %eax), %ebx |
295 | # endif |
296 | mov $-0x40, %esi |
297 | |
298 | L(Aligned64Loop): |
299 | movaps (%ecx), %xmm2 |
300 | movaps 32(%ecx), %xmm3 |
301 | movaps %xmm2, %xmm4 |
302 | movaps 16(%ecx), %xmm5 |
303 | movaps %xmm3, %xmm6 |
304 | movaps 48(%ecx), %xmm7 |
305 | pminub %xmm5, %xmm2 |
306 | pminub %xmm7, %xmm3 |
307 | pminub %xmm2, %xmm3 |
308 | lea 64(%edx), %edx |
309 | pcmpeqb %xmm0, %xmm3 |
310 | lea 64(%ecx), %ecx |
311 | pmovmskb %xmm3, %eax |
312 | # ifdef USE_AS_STRNCPY |
313 | sub $64, %ebx |
314 | jbe L(StrncpyLeaveCase2OrCase3) |
315 | # endif |
316 | test %eax, %eax |
317 | jnz L(Aligned64Leave) |
318 | movaps %xmm4, -64(%edx) |
319 | movaps %xmm5, -48(%edx) |
320 | movaps %xmm6, -32(%edx) |
321 | movaps %xmm7, -16(%edx) |
322 | jmp L(Aligned64Loop) |
323 | |
324 | L(Aligned64Leave): |
325 | # ifdef USE_AS_STRNCPY |
326 | lea 48(%ebx), %ebx |
327 | # endif |
328 | pcmpeqb %xmm4, %xmm0 |
329 | pmovmskb %xmm0, %eax |
330 | test %eax, %eax |
331 | jnz L(CopyFrom1To16Bytes) |
332 | |
333 | pcmpeqb %xmm5, %xmm0 |
334 | # ifdef USE_AS_STRNCPY |
335 | lea -16(%ebx), %ebx |
336 | # endif |
337 | pmovmskb %xmm0, %eax |
338 | movaps %xmm4, -64(%edx) |
339 | test %eax, %eax |
340 | lea 16(%esi), %esi |
341 | jnz L(CopyFrom1To16Bytes) |
342 | |
343 | pcmpeqb %xmm6, %xmm0 |
344 | # ifdef USE_AS_STRNCPY |
345 | lea -16(%ebx), %ebx |
346 | # endif |
347 | pmovmskb %xmm0, %eax |
348 | movaps %xmm5, -48(%edx) |
349 | test %eax, %eax |
350 | lea 16(%esi), %esi |
351 | jnz L(CopyFrom1To16Bytes) |
352 | |
353 | movaps %xmm6, -32(%edx) |
354 | pcmpeqb %xmm7, %xmm0 |
355 | # ifdef USE_AS_STRNCPY |
356 | lea -16(%ebx), %ebx |
357 | # endif |
358 | pmovmskb %xmm0, %eax |
359 | lea 16(%esi), %esi |
360 | jmp L(CopyFrom1To16Bytes) |
361 | |
362 | .p2align 4 |
363 | L(Shl1): |
364 | movaps -1(%ecx), %xmm1 |
365 | movaps 15(%ecx), %xmm2 |
366 | L(Shl1Start): |
367 | pcmpeqb %xmm2, %xmm0 |
368 | pmovmskb %xmm0, %eax |
369 | movaps %xmm2, %xmm3 |
370 | # ifdef USE_AS_STRNCPY |
371 | sub $16, %ebx |
372 | jbe L(StrncpyExit1Case2OrCase3) |
373 | # endif |
374 | test %eax, %eax |
375 | jnz L(Shl1LoopExit) |
376 | |
377 | palignr $1, %xmm1, %xmm2 |
378 | movaps %xmm3, %xmm1 |
379 | movaps %xmm2, (%edx) |
380 | movaps 31(%ecx), %xmm2 |
381 | |
382 | pcmpeqb %xmm2, %xmm0 |
383 | lea 16(%edx), %edx |
384 | pmovmskb %xmm0, %eax |
385 | lea 16(%ecx), %ecx |
386 | movaps %xmm2, %xmm3 |
387 | # ifdef USE_AS_STRNCPY |
388 | sub $16, %ebx |
389 | jbe L(StrncpyExit1Case2OrCase3) |
390 | # endif |
391 | test %eax, %eax |
392 | jnz L(Shl1LoopExit) |
393 | |
394 | palignr $1, %xmm1, %xmm2 |
395 | movaps %xmm2, (%edx) |
396 | movaps 31(%ecx), %xmm2 |
397 | movaps %xmm3, %xmm1 |
398 | |
399 | pcmpeqb %xmm2, %xmm0 |
400 | lea 16(%edx), %edx |
401 | pmovmskb %xmm0, %eax |
402 | lea 16(%ecx), %ecx |
403 | movaps %xmm2, %xmm3 |
404 | # ifdef USE_AS_STRNCPY |
405 | sub $16, %ebx |
406 | jbe L(StrncpyExit1Case2OrCase3) |
407 | # endif |
408 | test %eax, %eax |
409 | jnz L(Shl1LoopExit) |
410 | |
411 | palignr $1, %xmm1, %xmm2 |
412 | movaps %xmm2, (%edx) |
413 | movaps 31(%ecx), %xmm2 |
414 | |
415 | pcmpeqb %xmm2, %xmm0 |
416 | lea 16(%edx), %edx |
417 | pmovmskb %xmm0, %eax |
418 | lea 16(%ecx), %ecx |
419 | # ifdef USE_AS_STRNCPY |
420 | sub $16, %ebx |
421 | jbe L(StrncpyExit1Case2OrCase3) |
422 | # endif |
423 | test %eax, %eax |
424 | jnz L(Shl1LoopExit) |
425 | |
426 | palignr $1, %xmm3, %xmm2 |
427 | movaps %xmm2, (%edx) |
428 | lea 31(%ecx), %ecx |
429 | lea 16(%edx), %edx |
430 | |
431 | mov %ecx, %eax |
432 | and $-0x40, %ecx |
433 | sub %ecx, %eax |
434 | lea -15(%ecx), %ecx |
435 | sub %eax, %edx |
436 | # ifdef USE_AS_STRNCPY |
437 | add %eax, %ebx |
438 | # endif |
439 | movaps -1(%ecx), %xmm1 |
440 | |
441 | L(Shl1LoopStart): |
442 | movaps 15(%ecx), %xmm2 |
443 | movaps 31(%ecx), %xmm3 |
444 | movaps %xmm3, %xmm6 |
445 | movaps 47(%ecx), %xmm4 |
446 | movaps %xmm4, %xmm7 |
447 | movaps 63(%ecx), %xmm5 |
448 | pminub %xmm2, %xmm6 |
449 | pminub %xmm5, %xmm7 |
450 | pminub %xmm6, %xmm7 |
451 | pcmpeqb %xmm0, %xmm7 |
452 | pmovmskb %xmm7, %eax |
453 | movaps %xmm5, %xmm7 |
454 | palignr $1, %xmm4, %xmm5 |
455 | test %eax, %eax |
456 | palignr $1, %xmm3, %xmm4 |
457 | jnz L(Shl1Start) |
458 | # ifdef USE_AS_STRNCPY |
459 | sub $64, %ebx |
460 | jbe L(StrncpyLeave1) |
461 | # endif |
462 | palignr $1, %xmm2, %xmm3 |
463 | lea 64(%ecx), %ecx |
464 | palignr $1, %xmm1, %xmm2 |
465 | movaps %xmm7, %xmm1 |
466 | movaps %xmm5, 48(%edx) |
467 | movaps %xmm4, 32(%edx) |
468 | movaps %xmm3, 16(%edx) |
469 | movaps %xmm2, (%edx) |
470 | lea 64(%edx), %edx |
471 | jmp L(Shl1LoopStart) |
472 | |
473 | L(Shl1LoopExit): |
474 | movlpd (%ecx), %xmm0 |
475 | movlpd %xmm0, (%edx) |
476 | movlpd 7(%ecx), %xmm0 |
477 | movlpd %xmm0, 7(%edx) |
478 | mov $15, %esi |
479 | jmp L(CopyFrom1To16Bytes) |
480 | |
481 | .p2align 4 |
482 | L(Shl2): |
483 | movaps -2(%ecx), %xmm1 |
484 | movaps 14(%ecx), %xmm2 |
485 | L(Shl2Start): |
486 | pcmpeqb %xmm2, %xmm0 |
487 | pmovmskb %xmm0, %eax |
488 | movaps %xmm2, %xmm3 |
489 | # ifdef USE_AS_STRNCPY |
490 | sub $16, %ebx |
491 | jbe L(StrncpyExit2Case2OrCase3) |
492 | # endif |
493 | test %eax, %eax |
494 | jnz L(Shl2LoopExit) |
495 | |
496 | palignr $2, %xmm1, %xmm2 |
497 | movaps %xmm3, %xmm1 |
498 | movaps %xmm2, (%edx) |
499 | movaps 30(%ecx), %xmm2 |
500 | |
501 | pcmpeqb %xmm2, %xmm0 |
502 | lea 16(%edx), %edx |
503 | pmovmskb %xmm0, %eax |
504 | lea 16(%ecx), %ecx |
505 | movaps %xmm2, %xmm3 |
506 | # ifdef USE_AS_STRNCPY |
507 | sub $16, %ebx |
508 | jbe L(StrncpyExit2Case2OrCase3) |
509 | # endif |
510 | test %eax, %eax |
511 | jnz L(Shl2LoopExit) |
512 | |
513 | palignr $2, %xmm1, %xmm2 |
514 | movaps %xmm2, (%edx) |
515 | movaps 30(%ecx), %xmm2 |
516 | movaps %xmm3, %xmm1 |
517 | |
518 | pcmpeqb %xmm2, %xmm0 |
519 | lea 16(%edx), %edx |
520 | pmovmskb %xmm0, %eax |
521 | lea 16(%ecx), %ecx |
522 | movaps %xmm2, %xmm3 |
523 | # ifdef USE_AS_STRNCPY |
524 | sub $16, %ebx |
525 | jbe L(StrncpyExit2Case2OrCase3) |
526 | # endif |
527 | test %eax, %eax |
528 | jnz L(Shl2LoopExit) |
529 | |
530 | palignr $2, %xmm1, %xmm2 |
531 | movaps %xmm2, (%edx) |
532 | movaps 30(%ecx), %xmm2 |
533 | |
534 | pcmpeqb %xmm2, %xmm0 |
535 | lea 16(%edx), %edx |
536 | pmovmskb %xmm0, %eax |
537 | lea 16(%ecx), %ecx |
538 | # ifdef USE_AS_STRNCPY |
539 | sub $16, %ebx |
540 | jbe L(StrncpyExit2Case2OrCase3) |
541 | # endif |
542 | test %eax, %eax |
543 | jnz L(Shl2LoopExit) |
544 | |
545 | palignr $2, %xmm3, %xmm2 |
546 | movaps %xmm2, (%edx) |
547 | lea 30(%ecx), %ecx |
548 | lea 16(%edx), %edx |
549 | |
550 | mov %ecx, %eax |
551 | and $-0x40, %ecx |
552 | sub %ecx, %eax |
553 | lea -14(%ecx), %ecx |
554 | sub %eax, %edx |
555 | # ifdef USE_AS_STRNCPY |
556 | add %eax, %ebx |
557 | # endif |
558 | movaps -2(%ecx), %xmm1 |
559 | |
560 | L(Shl2LoopStart): |
561 | movaps 14(%ecx), %xmm2 |
562 | movaps 30(%ecx), %xmm3 |
563 | movaps %xmm3, %xmm6 |
564 | movaps 46(%ecx), %xmm4 |
565 | movaps %xmm4, %xmm7 |
566 | movaps 62(%ecx), %xmm5 |
567 | pminub %xmm2, %xmm6 |
568 | pminub %xmm5, %xmm7 |
569 | pminub %xmm6, %xmm7 |
570 | pcmpeqb %xmm0, %xmm7 |
571 | pmovmskb %xmm7, %eax |
572 | movaps %xmm5, %xmm7 |
573 | palignr $2, %xmm4, %xmm5 |
574 | test %eax, %eax |
575 | palignr $2, %xmm3, %xmm4 |
576 | jnz L(Shl2Start) |
577 | # ifdef USE_AS_STRNCPY |
578 | sub $64, %ebx |
579 | jbe L(StrncpyLeave2) |
580 | # endif |
581 | palignr $2, %xmm2, %xmm3 |
582 | lea 64(%ecx), %ecx |
583 | palignr $2, %xmm1, %xmm2 |
584 | movaps %xmm7, %xmm1 |
585 | movaps %xmm5, 48(%edx) |
586 | movaps %xmm4, 32(%edx) |
587 | movaps %xmm3, 16(%edx) |
588 | movaps %xmm2, (%edx) |
589 | lea 64(%edx), %edx |
590 | jmp L(Shl2LoopStart) |
591 | |
592 | L(Shl2LoopExit): |
593 | movlpd (%ecx), %xmm0 |
594 | movlpd 6(%ecx), %xmm1 |
595 | movlpd %xmm0, (%edx) |
596 | movlpd %xmm1, 6(%edx) |
597 | mov $14, %esi |
598 | jmp L(CopyFrom1To16Bytes) |
599 | |
600 | .p2align 4 |
601 | L(Shl3): |
602 | movaps -3(%ecx), %xmm1 |
603 | movaps 13(%ecx), %xmm2 |
604 | L(Shl3Start): |
605 | pcmpeqb %xmm2, %xmm0 |
606 | pmovmskb %xmm0, %eax |
607 | movaps %xmm2, %xmm3 |
608 | # ifdef USE_AS_STRNCPY |
609 | sub $16, %ebx |
610 | jbe L(StrncpyExit3Case2OrCase3) |
611 | # endif |
612 | test %eax, %eax |
613 | jnz L(Shl3LoopExit) |
614 | |
615 | palignr $3, %xmm1, %xmm2 |
616 | movaps %xmm3, %xmm1 |
617 | movaps %xmm2, (%edx) |
618 | movaps 29(%ecx), %xmm2 |
619 | |
620 | pcmpeqb %xmm2, %xmm0 |
621 | lea 16(%edx), %edx |
622 | pmovmskb %xmm0, %eax |
623 | lea 16(%ecx), %ecx |
624 | movaps %xmm2, %xmm3 |
625 | # ifdef USE_AS_STRNCPY |
626 | sub $16, %ebx |
627 | jbe L(StrncpyExit3Case2OrCase3) |
628 | # endif |
629 | test %eax, %eax |
630 | jnz L(Shl3LoopExit) |
631 | |
632 | palignr $3, %xmm1, %xmm2 |
633 | movaps %xmm2, (%edx) |
634 | movaps 29(%ecx), %xmm2 |
635 | movaps %xmm3, %xmm1 |
636 | |
637 | pcmpeqb %xmm2, %xmm0 |
638 | lea 16(%edx), %edx |
639 | pmovmskb %xmm0, %eax |
640 | lea 16(%ecx), %ecx |
641 | movaps %xmm2, %xmm3 |
642 | # ifdef USE_AS_STRNCPY |
643 | sub $16, %ebx |
644 | jbe L(StrncpyExit3Case2OrCase3) |
645 | # endif |
646 | test %eax, %eax |
647 | jnz L(Shl3LoopExit) |
648 | |
649 | palignr $3, %xmm1, %xmm2 |
650 | movaps %xmm2, (%edx) |
651 | movaps 29(%ecx), %xmm2 |
652 | |
653 | pcmpeqb %xmm2, %xmm0 |
654 | lea 16(%edx), %edx |
655 | pmovmskb %xmm0, %eax |
656 | lea 16(%ecx), %ecx |
657 | # ifdef USE_AS_STRNCPY |
658 | sub $16, %ebx |
659 | jbe L(StrncpyExit3Case2OrCase3) |
660 | # endif |
661 | test %eax, %eax |
662 | jnz L(Shl3LoopExit) |
663 | |
664 | palignr $3, %xmm3, %xmm2 |
665 | movaps %xmm2, (%edx) |
666 | lea 29(%ecx), %ecx |
667 | lea 16(%edx), %edx |
668 | |
669 | mov %ecx, %eax |
670 | and $-0x40, %ecx |
671 | sub %ecx, %eax |
672 | lea -13(%ecx), %ecx |
673 | sub %eax, %edx |
674 | # ifdef USE_AS_STRNCPY |
675 | add %eax, %ebx |
676 | # endif |
677 | movaps -3(%ecx), %xmm1 |
678 | |
679 | L(Shl3LoopStart): |
680 | movaps 13(%ecx), %xmm2 |
681 | movaps 29(%ecx), %xmm3 |
682 | movaps %xmm3, %xmm6 |
683 | movaps 45(%ecx), %xmm4 |
684 | movaps %xmm4, %xmm7 |
685 | movaps 61(%ecx), %xmm5 |
686 | pminub %xmm2, %xmm6 |
687 | pminub %xmm5, %xmm7 |
688 | pminub %xmm6, %xmm7 |
689 | pcmpeqb %xmm0, %xmm7 |
690 | pmovmskb %xmm7, %eax |
691 | movaps %xmm5, %xmm7 |
692 | palignr $3, %xmm4, %xmm5 |
693 | test %eax, %eax |
694 | palignr $3, %xmm3, %xmm4 |
695 | jnz L(Shl3Start) |
696 | # ifdef USE_AS_STRNCPY |
697 | sub $64, %ebx |
698 | jbe L(StrncpyLeave3) |
699 | # endif |
700 | palignr $3, %xmm2, %xmm3 |
701 | lea 64(%ecx), %ecx |
702 | palignr $3, %xmm1, %xmm2 |
703 | movaps %xmm7, %xmm1 |
704 | movaps %xmm5, 48(%edx) |
705 | movaps %xmm4, 32(%edx) |
706 | movaps %xmm3, 16(%edx) |
707 | movaps %xmm2, (%edx) |
708 | lea 64(%edx), %edx |
709 | jmp L(Shl3LoopStart) |
710 | |
711 | L(Shl3LoopExit): |
712 | movlpd (%ecx), %xmm0 |
713 | movlpd 5(%ecx), %xmm1 |
714 | movlpd %xmm0, (%edx) |
715 | movlpd %xmm1, 5(%edx) |
716 | mov $13, %esi |
717 | jmp L(CopyFrom1To16Bytes) |
718 | |
719 | .p2align 4 |
720 | L(Shl4): |
721 | movaps -4(%ecx), %xmm1 |
722 | movaps 12(%ecx), %xmm2 |
723 | L(Shl4Start): |
724 | pcmpeqb %xmm2, %xmm0 |
725 | pmovmskb %xmm0, %eax |
726 | movaps %xmm2, %xmm3 |
727 | # ifdef USE_AS_STRNCPY |
728 | sub $16, %ebx |
729 | jbe L(StrncpyExit4Case2OrCase3) |
730 | # endif |
731 | test %eax, %eax |
732 | jnz L(Shl4LoopExit) |
733 | |
734 | palignr $4, %xmm1, %xmm2 |
735 | movaps %xmm3, %xmm1 |
736 | movaps %xmm2, (%edx) |
737 | movaps 28(%ecx), %xmm2 |
738 | |
739 | pcmpeqb %xmm2, %xmm0 |
740 | lea 16(%edx), %edx |
741 | pmovmskb %xmm0, %eax |
742 | lea 16(%ecx), %ecx |
743 | movaps %xmm2, %xmm3 |
744 | # ifdef USE_AS_STRNCPY |
745 | sub $16, %ebx |
746 | jbe L(StrncpyExit4Case2OrCase3) |
747 | # endif |
748 | test %eax, %eax |
749 | jnz L(Shl4LoopExit) |
750 | |
751 | palignr $4, %xmm1, %xmm2 |
752 | movaps %xmm2, (%edx) |
753 | movaps 28(%ecx), %xmm2 |
754 | movaps %xmm3, %xmm1 |
755 | |
756 | pcmpeqb %xmm2, %xmm0 |
757 | lea 16(%edx), %edx |
758 | pmovmskb %xmm0, %eax |
759 | lea 16(%ecx), %ecx |
760 | movaps %xmm2, %xmm3 |
761 | # ifdef USE_AS_STRNCPY |
762 | sub $16, %ebx |
763 | jbe L(StrncpyExit4Case2OrCase3) |
764 | # endif |
765 | test %eax, %eax |
766 | jnz L(Shl4LoopExit) |
767 | |
768 | palignr $4, %xmm1, %xmm2 |
769 | movaps %xmm2, (%edx) |
770 | movaps 28(%ecx), %xmm2 |
771 | |
772 | pcmpeqb %xmm2, %xmm0 |
773 | lea 16(%edx), %edx |
774 | pmovmskb %xmm0, %eax |
775 | lea 16(%ecx), %ecx |
776 | # ifdef USE_AS_STRNCPY |
777 | sub $16, %ebx |
778 | jbe L(StrncpyExit4Case2OrCase3) |
779 | # endif |
780 | test %eax, %eax |
781 | jnz L(Shl4LoopExit) |
782 | |
783 | palignr $4, %xmm3, %xmm2 |
784 | movaps %xmm2, (%edx) |
785 | lea 28(%ecx), %ecx |
786 | lea 16(%edx), %edx |
787 | |
788 | mov %ecx, %eax |
789 | and $-0x40, %ecx |
790 | sub %ecx, %eax |
791 | lea -12(%ecx), %ecx |
792 | sub %eax, %edx |
793 | # ifdef USE_AS_STRNCPY |
794 | add %eax, %ebx |
795 | # endif |
796 | movaps -4(%ecx), %xmm1 |
797 | |
798 | L(Shl4LoopStart): |
799 | movaps 12(%ecx), %xmm2 |
800 | movaps 28(%ecx), %xmm3 |
801 | movaps %xmm3, %xmm6 |
802 | movaps 44(%ecx), %xmm4 |
803 | movaps %xmm4, %xmm7 |
804 | movaps 60(%ecx), %xmm5 |
805 | pminub %xmm2, %xmm6 |
806 | pminub %xmm5, %xmm7 |
807 | pminub %xmm6, %xmm7 |
808 | pcmpeqb %xmm0, %xmm7 |
809 | pmovmskb %xmm7, %eax |
810 | movaps %xmm5, %xmm7 |
811 | palignr $4, %xmm4, %xmm5 |
812 | test %eax, %eax |
813 | palignr $4, %xmm3, %xmm4 |
814 | jnz L(Shl4Start) |
815 | # ifdef USE_AS_STRNCPY |
816 | sub $64, %ebx |
817 | jbe L(StrncpyLeave4) |
818 | # endif |
819 | palignr $4, %xmm2, %xmm3 |
820 | lea 64(%ecx), %ecx |
821 | palignr $4, %xmm1, %xmm2 |
822 | movaps %xmm7, %xmm1 |
823 | movaps %xmm5, 48(%edx) |
824 | movaps %xmm4, 32(%edx) |
825 | movaps %xmm3, 16(%edx) |
826 | movaps %xmm2, (%edx) |
827 | lea 64(%edx), %edx |
828 | jmp L(Shl4LoopStart) |
829 | |
830 | L(Shl4LoopExit): |
831 | movlpd (%ecx), %xmm0 |
832 | movl 8(%ecx), %esi |
833 | movlpd %xmm0, (%edx) |
834 | movl %esi, 8(%edx) |
835 | mov $12, %esi |
836 | jmp L(CopyFrom1To16Bytes) |
837 | |
838 | .p2align 4 |
839 | L(Shl5): |
840 | movaps -5(%ecx), %xmm1 |
841 | movaps 11(%ecx), %xmm2 |
842 | L(Shl5Start): |
843 | pcmpeqb %xmm2, %xmm0 |
844 | pmovmskb %xmm0, %eax |
845 | movaps %xmm2, %xmm3 |
846 | # ifdef USE_AS_STRNCPY |
847 | sub $16, %ebx |
848 | jbe L(StrncpyExit5Case2OrCase3) |
849 | # endif |
850 | test %eax, %eax |
851 | jnz L(Shl5LoopExit) |
852 | |
853 | palignr $5, %xmm1, %xmm2 |
854 | movaps %xmm3, %xmm1 |
855 | movaps %xmm2, (%edx) |
856 | movaps 27(%ecx), %xmm2 |
857 | |
858 | pcmpeqb %xmm2, %xmm0 |
859 | lea 16(%edx), %edx |
860 | pmovmskb %xmm0, %eax |
861 | lea 16(%ecx), %ecx |
862 | movaps %xmm2, %xmm3 |
863 | # ifdef USE_AS_STRNCPY |
864 | sub $16, %ebx |
865 | jbe L(StrncpyExit5Case2OrCase3) |
866 | # endif |
867 | test %eax, %eax |
868 | jnz L(Shl5LoopExit) |
869 | |
870 | palignr $5, %xmm1, %xmm2 |
871 | movaps %xmm2, (%edx) |
872 | movaps 27(%ecx), %xmm2 |
873 | movaps %xmm3, %xmm1 |
874 | |
875 | pcmpeqb %xmm2, %xmm0 |
876 | lea 16(%edx), %edx |
877 | pmovmskb %xmm0, %eax |
878 | lea 16(%ecx), %ecx |
879 | movaps %xmm2, %xmm3 |
880 | # ifdef USE_AS_STRNCPY |
881 | sub $16, %ebx |
882 | jbe L(StrncpyExit5Case2OrCase3) |
883 | # endif |
884 | test %eax, %eax |
885 | jnz L(Shl5LoopExit) |
886 | |
887 | palignr $5, %xmm1, %xmm2 |
888 | movaps %xmm2, (%edx) |
889 | movaps 27(%ecx), %xmm2 |
890 | |
891 | pcmpeqb %xmm2, %xmm0 |
892 | lea 16(%edx), %edx |
893 | pmovmskb %xmm0, %eax |
894 | lea 16(%ecx), %ecx |
895 | # ifdef USE_AS_STRNCPY |
896 | sub $16, %ebx |
897 | jbe L(StrncpyExit5Case2OrCase3) |
898 | # endif |
899 | test %eax, %eax |
900 | jnz L(Shl5LoopExit) |
901 | |
902 | palignr $5, %xmm3, %xmm2 |
903 | movaps %xmm2, (%edx) |
904 | lea 27(%ecx), %ecx |
905 | lea 16(%edx), %edx |
906 | |
907 | mov %ecx, %eax |
908 | and $-0x40, %ecx |
909 | sub %ecx, %eax |
910 | lea -11(%ecx), %ecx |
911 | sub %eax, %edx |
912 | # ifdef USE_AS_STRNCPY |
913 | add %eax, %ebx |
914 | # endif |
915 | movaps -5(%ecx), %xmm1 |
916 | |
917 | L(Shl5LoopStart): |
918 | movaps 11(%ecx), %xmm2 |
919 | movaps 27(%ecx), %xmm3 |
920 | movaps %xmm3, %xmm6 |
921 | movaps 43(%ecx), %xmm4 |
922 | movaps %xmm4, %xmm7 |
923 | movaps 59(%ecx), %xmm5 |
924 | pminub %xmm2, %xmm6 |
925 | pminub %xmm5, %xmm7 |
926 | pminub %xmm6, %xmm7 |
927 | pcmpeqb %xmm0, %xmm7 |
928 | pmovmskb %xmm7, %eax |
929 | movaps %xmm5, %xmm7 |
930 | palignr $5, %xmm4, %xmm5 |
931 | test %eax, %eax |
932 | palignr $5, %xmm3, %xmm4 |
933 | jnz L(Shl5Start) |
934 | # ifdef USE_AS_STRNCPY |
935 | sub $64, %ebx |
936 | jbe L(StrncpyLeave5) |
937 | # endif |
938 | palignr $5, %xmm2, %xmm3 |
939 | lea 64(%ecx), %ecx |
940 | palignr $5, %xmm1, %xmm2 |
941 | movaps %xmm7, %xmm1 |
942 | movaps %xmm5, 48(%edx) |
943 | movaps %xmm4, 32(%edx) |
944 | movaps %xmm3, 16(%edx) |
945 | movaps %xmm2, (%edx) |
946 | lea 64(%edx), %edx |
947 | jmp L(Shl5LoopStart) |
948 | |
949 | L(Shl5LoopExit): |
950 | movlpd (%ecx), %xmm0 |
951 | movl 7(%ecx), %esi |
952 | movlpd %xmm0, (%edx) |
953 | movl %esi, 7(%edx) |
954 | mov $11, %esi |
955 | jmp L(CopyFrom1To16Bytes) |
956 | |
957 | .p2align 4 |
958 | L(Shl6): |
959 | movaps -6(%ecx), %xmm1 |
960 | movaps 10(%ecx), %xmm2 |
961 | L(Shl6Start): |
962 | pcmpeqb %xmm2, %xmm0 |
963 | pmovmskb %xmm0, %eax |
964 | movaps %xmm2, %xmm3 |
965 | # ifdef USE_AS_STRNCPY |
966 | sub $16, %ebx |
967 | jbe L(StrncpyExit6Case2OrCase3) |
968 | # endif |
969 | test %eax, %eax |
970 | jnz L(Shl6LoopExit) |
971 | |
972 | palignr $6, %xmm1, %xmm2 |
973 | movaps %xmm3, %xmm1 |
974 | movaps %xmm2, (%edx) |
975 | movaps 26(%ecx), %xmm2 |
976 | |
977 | pcmpeqb %xmm2, %xmm0 |
978 | lea 16(%edx), %edx |
979 | pmovmskb %xmm0, %eax |
980 | lea 16(%ecx), %ecx |
981 | movaps %xmm2, %xmm3 |
982 | # ifdef USE_AS_STRNCPY |
983 | sub $16, %ebx |
984 | jbe L(StrncpyExit6Case2OrCase3) |
985 | # endif |
986 | test %eax, %eax |
987 | jnz L(Shl6LoopExit) |
988 | |
989 | palignr $6, %xmm1, %xmm2 |
990 | movaps %xmm2, (%edx) |
991 | movaps 26(%ecx), %xmm2 |
992 | movaps %xmm3, %xmm1 |
993 | |
994 | pcmpeqb %xmm2, %xmm0 |
995 | lea 16(%edx), %edx |
996 | pmovmskb %xmm0, %eax |
997 | lea 16(%ecx), %ecx |
998 | movaps %xmm2, %xmm3 |
999 | # ifdef USE_AS_STRNCPY |
1000 | sub $16, %ebx |
1001 | jbe L(StrncpyExit6Case2OrCase3) |
1002 | # endif |
1003 | test %eax, %eax |
1004 | jnz L(Shl6LoopExit) |
1005 | |
1006 | palignr $6, %xmm1, %xmm2 |
1007 | movaps %xmm2, (%edx) |
1008 | movaps 26(%ecx), %xmm2 |
1009 | |
1010 | pcmpeqb %xmm2, %xmm0 |
1011 | lea 16(%edx), %edx |
1012 | pmovmskb %xmm0, %eax |
1013 | lea 16(%ecx), %ecx |
1014 | # ifdef USE_AS_STRNCPY |
1015 | sub $16, %ebx |
1016 | jbe L(StrncpyExit6Case2OrCase3) |
1017 | # endif |
1018 | test %eax, %eax |
1019 | jnz L(Shl6LoopExit) |
1020 | |
1021 | palignr $6, %xmm3, %xmm2 |
1022 | movaps %xmm2, (%edx) |
1023 | lea 26(%ecx), %ecx |
1024 | lea 16(%edx), %edx |
1025 | |
1026 | mov %ecx, %eax |
1027 | and $-0x40, %ecx |
1028 | sub %ecx, %eax |
1029 | lea -10(%ecx), %ecx |
1030 | sub %eax, %edx |
1031 | # ifdef USE_AS_STRNCPY |
1032 | add %eax, %ebx |
1033 | # endif |
1034 | movaps -6(%ecx), %xmm1 |
1035 | |
1036 | L(Shl6LoopStart): |
1037 | movaps 10(%ecx), %xmm2 |
1038 | movaps 26(%ecx), %xmm3 |
1039 | movaps %xmm3, %xmm6 |
1040 | movaps 42(%ecx), %xmm4 |
1041 | movaps %xmm4, %xmm7 |
1042 | movaps 58(%ecx), %xmm5 |
1043 | pminub %xmm2, %xmm6 |
1044 | pminub %xmm5, %xmm7 |
1045 | pminub %xmm6, %xmm7 |
1046 | pcmpeqb %xmm0, %xmm7 |
1047 | pmovmskb %xmm7, %eax |
1048 | movaps %xmm5, %xmm7 |
1049 | palignr $6, %xmm4, %xmm5 |
1050 | test %eax, %eax |
1051 | palignr $6, %xmm3, %xmm4 |
1052 | jnz L(Shl6Start) |
1053 | # ifdef USE_AS_STRNCPY |
1054 | sub $64, %ebx |
1055 | jbe L(StrncpyLeave6) |
1056 | # endif |
1057 | palignr $6, %xmm2, %xmm3 |
1058 | lea 64(%ecx), %ecx |
1059 | palignr $6, %xmm1, %xmm2 |
1060 | movaps %xmm7, %xmm1 |
1061 | movaps %xmm5, 48(%edx) |
1062 | movaps %xmm4, 32(%edx) |
1063 | movaps %xmm3, 16(%edx) |
1064 | movaps %xmm2, (%edx) |
1065 | lea 64(%edx), %edx |
1066 | jmp L(Shl6LoopStart) |
1067 | |
1068 | L(Shl6LoopExit): |
1069 | movlpd (%ecx), %xmm0 |
1070 | movl 6(%ecx), %esi |
1071 | movlpd %xmm0, (%edx) |
1072 | movl %esi, 6(%edx) |
1073 | mov $10, %esi |
1074 | jmp L(CopyFrom1To16Bytes) |
1075 | |
1076 | .p2align 4 |
1077 | L(Shl7): |
1078 | movaps -7(%ecx), %xmm1 |
1079 | movaps 9(%ecx), %xmm2 |
1080 | L(Shl7Start): |
1081 | pcmpeqb %xmm2, %xmm0 |
1082 | pmovmskb %xmm0, %eax |
1083 | movaps %xmm2, %xmm3 |
1084 | # ifdef USE_AS_STRNCPY |
1085 | sub $16, %ebx |
1086 | jbe L(StrncpyExit7Case2OrCase3) |
1087 | # endif |
1088 | test %eax, %eax |
1089 | jnz L(Shl7LoopExit) |
1090 | |
1091 | palignr $7, %xmm1, %xmm2 |
1092 | movaps %xmm3, %xmm1 |
1093 | movaps %xmm2, (%edx) |
1094 | movaps 25(%ecx), %xmm2 |
1095 | |
1096 | pcmpeqb %xmm2, %xmm0 |
1097 | lea 16(%edx), %edx |
1098 | pmovmskb %xmm0, %eax |
1099 | lea 16(%ecx), %ecx |
1100 | movaps %xmm2, %xmm3 |
1101 | # ifdef USE_AS_STRNCPY |
1102 | sub $16, %ebx |
1103 | jbe L(StrncpyExit7Case2OrCase3) |
1104 | # endif |
1105 | test %eax, %eax |
1106 | jnz L(Shl7LoopExit) |
1107 | |
1108 | palignr $7, %xmm1, %xmm2 |
1109 | movaps %xmm2, (%edx) |
1110 | movaps 25(%ecx), %xmm2 |
1111 | movaps %xmm3, %xmm1 |
1112 | |
1113 | pcmpeqb %xmm2, %xmm0 |
1114 | lea 16(%edx), %edx |
1115 | pmovmskb %xmm0, %eax |
1116 | lea 16(%ecx), %ecx |
1117 | movaps %xmm2, %xmm3 |
1118 | # ifdef USE_AS_STRNCPY |
1119 | sub $16, %ebx |
1120 | jbe L(StrncpyExit7Case2OrCase3) |
1121 | # endif |
1122 | test %eax, %eax |
1123 | jnz L(Shl7LoopExit) |
1124 | |
1125 | palignr $7, %xmm1, %xmm2 |
1126 | movaps %xmm2, (%edx) |
1127 | movaps 25(%ecx), %xmm2 |
1128 | |
1129 | pcmpeqb %xmm2, %xmm0 |
1130 | lea 16(%edx), %edx |
1131 | pmovmskb %xmm0, %eax |
1132 | lea 16(%ecx), %ecx |
1133 | # ifdef USE_AS_STRNCPY |
1134 | sub $16, %ebx |
1135 | jbe L(StrncpyExit7Case2OrCase3) |
1136 | # endif |
1137 | test %eax, %eax |
1138 | jnz L(Shl7LoopExit) |
1139 | |
1140 | palignr $7, %xmm3, %xmm2 |
1141 | movaps %xmm2, (%edx) |
1142 | lea 25(%ecx), %ecx |
1143 | lea 16(%edx), %edx |
1144 | |
1145 | mov %ecx, %eax |
1146 | and $-0x40, %ecx |
1147 | sub %ecx, %eax |
1148 | lea -9(%ecx), %ecx |
1149 | sub %eax, %edx |
1150 | # ifdef USE_AS_STRNCPY |
1151 | add %eax, %ebx |
1152 | # endif |
1153 | movaps -7(%ecx), %xmm1 |
1154 | |
1155 | L(Shl7LoopStart): |
1156 | movaps 9(%ecx), %xmm2 |
1157 | movaps 25(%ecx), %xmm3 |
1158 | movaps %xmm3, %xmm6 |
1159 | movaps 41(%ecx), %xmm4 |
1160 | movaps %xmm4, %xmm7 |
1161 | movaps 57(%ecx), %xmm5 |
1162 | pminub %xmm2, %xmm6 |
1163 | pminub %xmm5, %xmm7 |
1164 | pminub %xmm6, %xmm7 |
1165 | pcmpeqb %xmm0, %xmm7 |
1166 | pmovmskb %xmm7, %eax |
1167 | movaps %xmm5, %xmm7 |
1168 | palignr $7, %xmm4, %xmm5 |
1169 | test %eax, %eax |
1170 | palignr $7, %xmm3, %xmm4 |
1171 | jnz L(Shl7Start) |
1172 | # ifdef USE_AS_STRNCPY |
1173 | sub $64, %ebx |
1174 | jbe L(StrncpyLeave7) |
1175 | # endif |
1176 | palignr $7, %xmm2, %xmm3 |
1177 | lea 64(%ecx), %ecx |
1178 | palignr $7, %xmm1, %xmm2 |
1179 | movaps %xmm7, %xmm1 |
1180 | movaps %xmm5, 48(%edx) |
1181 | movaps %xmm4, 32(%edx) |
1182 | movaps %xmm3, 16(%edx) |
1183 | movaps %xmm2, (%edx) |
1184 | lea 64(%edx), %edx |
1185 | jmp L(Shl7LoopStart) |
1186 | |
1187 | L(Shl7LoopExit): |
1188 | movlpd (%ecx), %xmm0 |
1189 | movl 5(%ecx), %esi |
1190 | movlpd %xmm0, (%edx) |
1191 | movl %esi, 5(%edx) |
1192 | mov $9, %esi |
1193 | jmp L(CopyFrom1To16Bytes) |
1194 | |
1195 | .p2align 4 |
1196 | L(Shl8): |
1197 | movaps -8(%ecx), %xmm1 |
1198 | movaps 8(%ecx), %xmm2 |
1199 | L(Shl8Start): |
1200 | pcmpeqb %xmm2, %xmm0 |
1201 | pmovmskb %xmm0, %eax |
1202 | movaps %xmm2, %xmm3 |
1203 | # ifdef USE_AS_STRNCPY |
1204 | sub $16, %ebx |
1205 | jbe L(StrncpyExit8Case2OrCase3) |
1206 | # endif |
1207 | test %eax, %eax |
1208 | jnz L(Shl8LoopExit) |
1209 | |
1210 | palignr $8, %xmm1, %xmm2 |
1211 | movaps %xmm3, %xmm1 |
1212 | movaps %xmm2, (%edx) |
1213 | movaps 24(%ecx), %xmm2 |
1214 | |
1215 | pcmpeqb %xmm2, %xmm0 |
1216 | lea 16(%edx), %edx |
1217 | pmovmskb %xmm0, %eax |
1218 | lea 16(%ecx), %ecx |
1219 | movaps %xmm2, %xmm3 |
1220 | # ifdef USE_AS_STRNCPY |
1221 | sub $16, %ebx |
1222 | jbe L(StrncpyExit8Case2OrCase3) |
1223 | # endif |
1224 | test %eax, %eax |
1225 | jnz L(Shl8LoopExit) |
1226 | |
1227 | palignr $8, %xmm1, %xmm2 |
1228 | movaps %xmm2, (%edx) |
1229 | movaps 24(%ecx), %xmm2 |
1230 | movaps %xmm3, %xmm1 |
1231 | |
1232 | pcmpeqb %xmm2, %xmm0 |
1233 | lea 16(%edx), %edx |
1234 | pmovmskb %xmm0, %eax |
1235 | lea 16(%ecx), %ecx |
1236 | movaps %xmm2, %xmm3 |
1237 | # ifdef USE_AS_STRNCPY |
1238 | sub $16, %ebx |
1239 | jbe L(StrncpyExit8Case2OrCase3) |
1240 | # endif |
1241 | test %eax, %eax |
1242 | jnz L(Shl8LoopExit) |
1243 | |
1244 | palignr $8, %xmm1, %xmm2 |
1245 | movaps %xmm2, (%edx) |
1246 | movaps 24(%ecx), %xmm2 |
1247 | |
1248 | pcmpeqb %xmm2, %xmm0 |
1249 | lea 16(%edx), %edx |
1250 | pmovmskb %xmm0, %eax |
1251 | lea 16(%ecx), %ecx |
1252 | # ifdef USE_AS_STRNCPY |
1253 | sub $16, %ebx |
1254 | jbe L(StrncpyExit8Case2OrCase3) |
1255 | # endif |
1256 | test %eax, %eax |
1257 | jnz L(Shl8LoopExit) |
1258 | |
1259 | palignr $8, %xmm3, %xmm2 |
1260 | movaps %xmm2, (%edx) |
1261 | lea 24(%ecx), %ecx |
1262 | lea 16(%edx), %edx |
1263 | |
1264 | mov %ecx, %eax |
1265 | and $-0x40, %ecx |
1266 | sub %ecx, %eax |
1267 | lea -8(%ecx), %ecx |
1268 | sub %eax, %edx |
1269 | # ifdef USE_AS_STRNCPY |
1270 | add %eax, %ebx |
1271 | # endif |
1272 | movaps -8(%ecx), %xmm1 |
1273 | |
1274 | L(Shl8LoopStart): |
1275 | movaps 8(%ecx), %xmm2 |
1276 | movaps 24(%ecx), %xmm3 |
1277 | movaps %xmm3, %xmm6 |
1278 | movaps 40(%ecx), %xmm4 |
1279 | movaps %xmm4, %xmm7 |
1280 | movaps 56(%ecx), %xmm5 |
1281 | pminub %xmm2, %xmm6 |
1282 | pminub %xmm5, %xmm7 |
1283 | pminub %xmm6, %xmm7 |
1284 | pcmpeqb %xmm0, %xmm7 |
1285 | pmovmskb %xmm7, %eax |
1286 | movaps %xmm5, %xmm7 |
1287 | palignr $8, %xmm4, %xmm5 |
1288 | test %eax, %eax |
1289 | palignr $8, %xmm3, %xmm4 |
1290 | jnz L(Shl8Start) |
1291 | # ifdef USE_AS_STRNCPY |
1292 | sub $64, %ebx |
1293 | jbe L(StrncpyLeave8) |
1294 | # endif |
1295 | palignr $8, %xmm2, %xmm3 |
1296 | lea 64(%ecx), %ecx |
1297 | palignr $8, %xmm1, %xmm2 |
1298 | movaps %xmm7, %xmm1 |
1299 | movaps %xmm5, 48(%edx) |
1300 | movaps %xmm4, 32(%edx) |
1301 | movaps %xmm3, 16(%edx) |
1302 | movaps %xmm2, (%edx) |
1303 | lea 64(%edx), %edx |
1304 | jmp L(Shl8LoopStart) |
1305 | |
1306 | L(Shl8LoopExit): |
1307 | movlpd (%ecx), %xmm0 |
1308 | movlpd %xmm0, (%edx) |
1309 | mov $8, %esi |
1310 | jmp L(CopyFrom1To16Bytes) |
1311 | |
1312 | .p2align 4 |
1313 | L(Shl9): |
1314 | movaps -9(%ecx), %xmm1 |
1315 | movaps 7(%ecx), %xmm2 |
1316 | L(Shl9Start): |
1317 | pcmpeqb %xmm2, %xmm0 |
1318 | pmovmskb %xmm0, %eax |
1319 | movaps %xmm2, %xmm3 |
1320 | # ifdef USE_AS_STRNCPY |
1321 | sub $16, %ebx |
1322 | jbe L(StrncpyExit9Case2OrCase3) |
1323 | # endif |
1324 | test %eax, %eax |
1325 | jnz L(Shl9LoopExit) |
1326 | |
1327 | palignr $9, %xmm1, %xmm2 |
1328 | movaps %xmm3, %xmm1 |
1329 | movaps %xmm2, (%edx) |
1330 | movaps 23(%ecx), %xmm2 |
1331 | |
1332 | pcmpeqb %xmm2, %xmm0 |
1333 | lea 16(%edx), %edx |
1334 | pmovmskb %xmm0, %eax |
1335 | lea 16(%ecx), %ecx |
1336 | movaps %xmm2, %xmm3 |
1337 | # ifdef USE_AS_STRNCPY |
1338 | sub $16, %ebx |
1339 | jbe L(StrncpyExit9Case2OrCase3) |
1340 | # endif |
1341 | test %eax, %eax |
1342 | jnz L(Shl9LoopExit) |
1343 | |
1344 | palignr $9, %xmm1, %xmm2 |
1345 | movaps %xmm2, (%edx) |
1346 | movaps 23(%ecx), %xmm2 |
1347 | movaps %xmm3, %xmm1 |
1348 | |
1349 | pcmpeqb %xmm2, %xmm0 |
1350 | lea 16(%edx), %edx |
1351 | pmovmskb %xmm0, %eax |
1352 | lea 16(%ecx), %ecx |
1353 | movaps %xmm2, %xmm3 |
1354 | # ifdef USE_AS_STRNCPY |
1355 | sub $16, %ebx |
1356 | jbe L(StrncpyExit9Case2OrCase3) |
1357 | # endif |
1358 | test %eax, %eax |
1359 | jnz L(Shl9LoopExit) |
1360 | |
1361 | palignr $9, %xmm1, %xmm2 |
1362 | movaps %xmm2, (%edx) |
1363 | movaps 23(%ecx), %xmm2 |
1364 | |
1365 | pcmpeqb %xmm2, %xmm0 |
1366 | lea 16(%edx), %edx |
1367 | pmovmskb %xmm0, %eax |
1368 | lea 16(%ecx), %ecx |
1369 | # ifdef USE_AS_STRNCPY |
1370 | sub $16, %ebx |
1371 | jbe L(StrncpyExit9Case2OrCase3) |
1372 | # endif |
1373 | test %eax, %eax |
1374 | jnz L(Shl9LoopExit) |
1375 | |
1376 | palignr $9, %xmm3, %xmm2 |
1377 | movaps %xmm2, (%edx) |
1378 | lea 23(%ecx), %ecx |
1379 | lea 16(%edx), %edx |
1380 | |
1381 | mov %ecx, %eax |
1382 | and $-0x40, %ecx |
1383 | sub %ecx, %eax |
1384 | lea -7(%ecx), %ecx |
1385 | sub %eax, %edx |
1386 | # ifdef USE_AS_STRNCPY |
1387 | add %eax, %ebx |
1388 | # endif |
1389 | movaps -9(%ecx), %xmm1 |
1390 | |
1391 | L(Shl9LoopStart): |
1392 | movaps 7(%ecx), %xmm2 |
1393 | movaps 23(%ecx), %xmm3 |
1394 | movaps %xmm3, %xmm6 |
1395 | movaps 39(%ecx), %xmm4 |
1396 | movaps %xmm4, %xmm7 |
1397 | movaps 55(%ecx), %xmm5 |
1398 | pminub %xmm2, %xmm6 |
1399 | pminub %xmm5, %xmm7 |
1400 | pminub %xmm6, %xmm7 |
1401 | pcmpeqb %xmm0, %xmm7 |
1402 | pmovmskb %xmm7, %eax |
1403 | movaps %xmm5, %xmm7 |
1404 | palignr $9, %xmm4, %xmm5 |
1405 | test %eax, %eax |
1406 | palignr $9, %xmm3, %xmm4 |
1407 | jnz L(Shl9Start) |
1408 | # ifdef USE_AS_STRNCPY |
1409 | sub $64, %ebx |
1410 | jbe L(StrncpyLeave9) |
1411 | # endif |
1412 | palignr $9, %xmm2, %xmm3 |
1413 | lea 64(%ecx), %ecx |
1414 | palignr $9, %xmm1, %xmm2 |
1415 | movaps %xmm7, %xmm1 |
1416 | movaps %xmm5, 48(%edx) |
1417 | movaps %xmm4, 32(%edx) |
1418 | movaps %xmm3, 16(%edx) |
1419 | movaps %xmm2, (%edx) |
1420 | lea 64(%edx), %edx |
1421 | jmp L(Shl9LoopStart) |
1422 | |
1423 | L(Shl9LoopExit): |
1424 | movlpd -1(%ecx), %xmm0 |
1425 | movlpd %xmm0, -1(%edx) |
1426 | mov $7, %esi |
1427 | jmp L(CopyFrom1To16Bytes) |
1428 | |
1429 | .p2align 4 |
1430 | L(Shl10): |
1431 | movaps -10(%ecx), %xmm1 |
1432 | movaps 6(%ecx), %xmm2 |
1433 | L(Shl10Start): |
1434 | pcmpeqb %xmm2, %xmm0 |
1435 | pmovmskb %xmm0, %eax |
1436 | movaps %xmm2, %xmm3 |
1437 | # ifdef USE_AS_STRNCPY |
1438 | sub $16, %ebx |
1439 | jbe L(StrncpyExit10Case2OrCase3) |
1440 | # endif |
1441 | test %eax, %eax |
1442 | jnz L(Shl10LoopExit) |
1443 | |
1444 | palignr $10, %xmm1, %xmm2 |
1445 | movaps %xmm3, %xmm1 |
1446 | movaps %xmm2, (%edx) |
1447 | movaps 22(%ecx), %xmm2 |
1448 | |
1449 | pcmpeqb %xmm2, %xmm0 |
1450 | lea 16(%edx), %edx |
1451 | pmovmskb %xmm0, %eax |
1452 | lea 16(%ecx), %ecx |
1453 | movaps %xmm2, %xmm3 |
1454 | # ifdef USE_AS_STRNCPY |
1455 | sub $16, %ebx |
1456 | jbe L(StrncpyExit10Case2OrCase3) |
1457 | # endif |
1458 | test %eax, %eax |
1459 | jnz L(Shl10LoopExit) |
1460 | |
1461 | palignr $10, %xmm1, %xmm2 |
1462 | movaps %xmm2, (%edx) |
1463 | movaps 22(%ecx), %xmm2 |
1464 | movaps %xmm3, %xmm1 |
1465 | |
1466 | pcmpeqb %xmm2, %xmm0 |
1467 | lea 16(%edx), %edx |
1468 | pmovmskb %xmm0, %eax |
1469 | lea 16(%ecx), %ecx |
1470 | movaps %xmm2, %xmm3 |
1471 | # ifdef USE_AS_STRNCPY |
1472 | sub $16, %ebx |
1473 | jbe L(StrncpyExit10Case2OrCase3) |
1474 | # endif |
1475 | test %eax, %eax |
1476 | jnz L(Shl10LoopExit) |
1477 | |
1478 | palignr $10, %xmm1, %xmm2 |
1479 | movaps %xmm2, (%edx) |
1480 | movaps 22(%ecx), %xmm2 |
1481 | |
1482 | pcmpeqb %xmm2, %xmm0 |
1483 | lea 16(%edx), %edx |
1484 | pmovmskb %xmm0, %eax |
1485 | lea 16(%ecx), %ecx |
1486 | # ifdef USE_AS_STRNCPY |
1487 | sub $16, %ebx |
1488 | jbe L(StrncpyExit10Case2OrCase3) |
1489 | # endif |
1490 | test %eax, %eax |
1491 | jnz L(Shl10LoopExit) |
1492 | |
1493 | palignr $10, %xmm3, %xmm2 |
1494 | movaps %xmm2, (%edx) |
1495 | lea 22(%ecx), %ecx |
1496 | lea 16(%edx), %edx |
1497 | |
1498 | mov %ecx, %eax |
1499 | and $-0x40, %ecx |
1500 | sub %ecx, %eax |
1501 | lea -6(%ecx), %ecx |
1502 | sub %eax, %edx |
1503 | # ifdef USE_AS_STRNCPY |
1504 | add %eax, %ebx |
1505 | # endif |
1506 | movaps -10(%ecx), %xmm1 |
1507 | |
1508 | L(Shl10LoopStart): |
1509 | movaps 6(%ecx), %xmm2 |
1510 | movaps 22(%ecx), %xmm3 |
1511 | movaps %xmm3, %xmm6 |
1512 | movaps 38(%ecx), %xmm4 |
1513 | movaps %xmm4, %xmm7 |
1514 | movaps 54(%ecx), %xmm5 |
1515 | pminub %xmm2, %xmm6 |
1516 | pminub %xmm5, %xmm7 |
1517 | pminub %xmm6, %xmm7 |
1518 | pcmpeqb %xmm0, %xmm7 |
1519 | pmovmskb %xmm7, %eax |
1520 | movaps %xmm5, %xmm7 |
1521 | palignr $10, %xmm4, %xmm5 |
1522 | test %eax, %eax |
1523 | palignr $10, %xmm3, %xmm4 |
1524 | jnz L(Shl10Start) |
1525 | # ifdef USE_AS_STRNCPY |
1526 | sub $64, %ebx |
1527 | jbe L(StrncpyLeave10) |
1528 | # endif |
1529 | palignr $10, %xmm2, %xmm3 |
1530 | lea 64(%ecx), %ecx |
1531 | palignr $10, %xmm1, %xmm2 |
1532 | movaps %xmm7, %xmm1 |
1533 | movaps %xmm5, 48(%edx) |
1534 | movaps %xmm4, 32(%edx) |
1535 | movaps %xmm3, 16(%edx) |
1536 | movaps %xmm2, (%edx) |
1537 | lea 64(%edx), %edx |
1538 | jmp L(Shl10LoopStart) |
1539 | |
1540 | L(Shl10LoopExit): |
1541 | movlpd -2(%ecx), %xmm0 |
1542 | movlpd %xmm0, -2(%edx) |
1543 | mov $6, %esi |
1544 | jmp L(CopyFrom1To16Bytes) |
1545 | |
1546 | .p2align 4 |
1547 | L(Shl11): |
1548 | movaps -11(%ecx), %xmm1 |
1549 | movaps 5(%ecx), %xmm2 |
1550 | L(Shl11Start): |
1551 | pcmpeqb %xmm2, %xmm0 |
1552 | pmovmskb %xmm0, %eax |
1553 | movaps %xmm2, %xmm3 |
1554 | # ifdef USE_AS_STRNCPY |
1555 | sub $16, %ebx |
1556 | jbe L(StrncpyExit11Case2OrCase3) |
1557 | # endif |
1558 | test %eax, %eax |
1559 | jnz L(Shl11LoopExit) |
1560 | |
1561 | palignr $11, %xmm1, %xmm2 |
1562 | movaps %xmm3, %xmm1 |
1563 | movaps %xmm2, (%edx) |
1564 | movaps 21(%ecx), %xmm2 |
1565 | |
1566 | pcmpeqb %xmm2, %xmm0 |
1567 | lea 16(%edx), %edx |
1568 | pmovmskb %xmm0, %eax |
1569 | lea 16(%ecx), %ecx |
1570 | movaps %xmm2, %xmm3 |
1571 | # ifdef USE_AS_STRNCPY |
1572 | sub $16, %ebx |
1573 | jbe L(StrncpyExit11Case2OrCase3) |
1574 | # endif |
1575 | test %eax, %eax |
1576 | jnz L(Shl11LoopExit) |
1577 | |
1578 | palignr $11, %xmm1, %xmm2 |
1579 | movaps %xmm2, (%edx) |
1580 | movaps 21(%ecx), %xmm2 |
1581 | movaps %xmm3, %xmm1 |
1582 | |
1583 | pcmpeqb %xmm2, %xmm0 |
1584 | lea 16(%edx), %edx |
1585 | pmovmskb %xmm0, %eax |
1586 | lea 16(%ecx), %ecx |
1587 | movaps %xmm2, %xmm3 |
1588 | # ifdef USE_AS_STRNCPY |
1589 | sub $16, %ebx |
1590 | jbe L(StrncpyExit11Case2OrCase3) |
1591 | # endif |
1592 | test %eax, %eax |
1593 | jnz L(Shl11LoopExit) |
1594 | |
1595 | palignr $11, %xmm1, %xmm2 |
1596 | movaps %xmm2, (%edx) |
1597 | movaps 21(%ecx), %xmm2 |
1598 | |
1599 | pcmpeqb %xmm2, %xmm0 |
1600 | lea 16(%edx), %edx |
1601 | pmovmskb %xmm0, %eax |
1602 | lea 16(%ecx), %ecx |
1603 | # ifdef USE_AS_STRNCPY |
1604 | sub $16, %ebx |
1605 | jbe L(StrncpyExit11Case2OrCase3) |
1606 | # endif |
1607 | test %eax, %eax |
1608 | jnz L(Shl11LoopExit) |
1609 | |
1610 | palignr $11, %xmm3, %xmm2 |
1611 | movaps %xmm2, (%edx) |
1612 | lea 21(%ecx), %ecx |
1613 | lea 16(%edx), %edx |
1614 | |
1615 | mov %ecx, %eax |
1616 | and $-0x40, %ecx |
1617 | sub %ecx, %eax |
1618 | lea -5(%ecx), %ecx |
1619 | sub %eax, %edx |
1620 | # ifdef USE_AS_STRNCPY |
1621 | add %eax, %ebx |
1622 | # endif |
1623 | movaps -11(%ecx), %xmm1 |
1624 | |
1625 | L(Shl11LoopStart): |
1626 | movaps 5(%ecx), %xmm2 |
1627 | movaps 21(%ecx), %xmm3 |
1628 | movaps %xmm3, %xmm6 |
1629 | movaps 37(%ecx), %xmm4 |
1630 | movaps %xmm4, %xmm7 |
1631 | movaps 53(%ecx), %xmm5 |
1632 | pminub %xmm2, %xmm6 |
1633 | pminub %xmm5, %xmm7 |
1634 | pminub %xmm6, %xmm7 |
1635 | pcmpeqb %xmm0, %xmm7 |
1636 | pmovmskb %xmm7, %eax |
1637 | movaps %xmm5, %xmm7 |
1638 | palignr $11, %xmm4, %xmm5 |
1639 | test %eax, %eax |
1640 | palignr $11, %xmm3, %xmm4 |
1641 | jnz L(Shl11Start) |
1642 | # ifdef USE_AS_STRNCPY |
1643 | sub $64, %ebx |
1644 | jbe L(StrncpyLeave11) |
1645 | # endif |
1646 | palignr $11, %xmm2, %xmm3 |
1647 | lea 64(%ecx), %ecx |
1648 | palignr $11, %xmm1, %xmm2 |
1649 | movaps %xmm7, %xmm1 |
1650 | movaps %xmm5, 48(%edx) |
1651 | movaps %xmm4, 32(%edx) |
1652 | movaps %xmm3, 16(%edx) |
1653 | movaps %xmm2, (%edx) |
1654 | lea 64(%edx), %edx |
1655 | jmp L(Shl11LoopStart) |
1656 | |
1657 | L(Shl11LoopExit): |
1658 | movlpd -3(%ecx), %xmm0 |
1659 | movlpd %xmm0, -3(%edx) |
1660 | mov $5, %esi |
1661 | jmp L(CopyFrom1To16Bytes) |
1662 | |
1663 | .p2align 4 |
1664 | L(Shl12): |
1665 | movaps -12(%ecx), %xmm1 |
1666 | movaps 4(%ecx), %xmm2 |
1667 | L(Shl12Start): |
1668 | pcmpeqb %xmm2, %xmm0 |
1669 | pmovmskb %xmm0, %eax |
1670 | movaps %xmm2, %xmm3 |
1671 | # ifdef USE_AS_STRNCPY |
1672 | sub $16, %ebx |
1673 | jbe L(StrncpyExit12Case2OrCase3) |
1674 | # endif |
1675 | test %eax, %eax |
1676 | jnz L(Shl12LoopExit) |
1677 | |
1678 | palignr $12, %xmm1, %xmm2 |
1679 | movaps %xmm3, %xmm1 |
1680 | movaps %xmm2, (%edx) |
1681 | movaps 20(%ecx), %xmm2 |
1682 | |
1683 | pcmpeqb %xmm2, %xmm0 |
1684 | lea 16(%edx), %edx |
1685 | pmovmskb %xmm0, %eax |
1686 | lea 16(%ecx), %ecx |
1687 | movaps %xmm2, %xmm3 |
1688 | # ifdef USE_AS_STRNCPY |
1689 | sub $16, %ebx |
1690 | jbe L(StrncpyExit12Case2OrCase3) |
1691 | # endif |
1692 | test %eax, %eax |
1693 | jnz L(Shl12LoopExit) |
1694 | |
1695 | palignr $12, %xmm1, %xmm2 |
1696 | movaps %xmm2, (%edx) |
1697 | movaps 20(%ecx), %xmm2 |
1698 | movaps %xmm3, %xmm1 |
1699 | |
1700 | pcmpeqb %xmm2, %xmm0 |
1701 | lea 16(%edx), %edx |
1702 | pmovmskb %xmm0, %eax |
1703 | lea 16(%ecx), %ecx |
1704 | movaps %xmm2, %xmm3 |
1705 | # ifdef USE_AS_STRNCPY |
1706 | sub $16, %ebx |
1707 | jbe L(StrncpyExit12Case2OrCase3) |
1708 | # endif |
1709 | test %eax, %eax |
1710 | jnz L(Shl12LoopExit) |
1711 | |
1712 | palignr $12, %xmm1, %xmm2 |
1713 | movaps %xmm2, (%edx) |
1714 | movaps 20(%ecx), %xmm2 |
1715 | |
1716 | pcmpeqb %xmm2, %xmm0 |
1717 | lea 16(%edx), %edx |
1718 | pmovmskb %xmm0, %eax |
1719 | lea 16(%ecx), %ecx |
1720 | # ifdef USE_AS_STRNCPY |
1721 | sub $16, %ebx |
1722 | jbe L(StrncpyExit12Case2OrCase3) |
1723 | # endif |
1724 | test %eax, %eax |
1725 | jnz L(Shl12LoopExit) |
1726 | |
1727 | palignr $12, %xmm3, %xmm2 |
1728 | movaps %xmm2, (%edx) |
1729 | lea 20(%ecx), %ecx |
1730 | lea 16(%edx), %edx |
1731 | |
1732 | mov %ecx, %eax |
1733 | and $-0x40, %ecx |
1734 | sub %ecx, %eax |
1735 | lea -4(%ecx), %ecx |
1736 | sub %eax, %edx |
1737 | # ifdef USE_AS_STRNCPY |
1738 | add %eax, %ebx |
1739 | # endif |
1740 | movaps -12(%ecx), %xmm1 |
1741 | |
1742 | L(Shl12LoopStart): |
1743 | movaps 4(%ecx), %xmm2 |
1744 | movaps 20(%ecx), %xmm3 |
1745 | movaps %xmm3, %xmm6 |
1746 | movaps 36(%ecx), %xmm4 |
1747 | movaps %xmm4, %xmm7 |
1748 | movaps 52(%ecx), %xmm5 |
1749 | pminub %xmm2, %xmm6 |
1750 | pminub %xmm5, %xmm7 |
1751 | pminub %xmm6, %xmm7 |
1752 | pcmpeqb %xmm0, %xmm7 |
1753 | pmovmskb %xmm7, %eax |
1754 | movaps %xmm5, %xmm7 |
1755 | palignr $12, %xmm4, %xmm5 |
1756 | test %eax, %eax |
1757 | palignr $12, %xmm3, %xmm4 |
1758 | jnz L(Shl12Start) |
1759 | # ifdef USE_AS_STRNCPY |
1760 | sub $64, %ebx |
1761 | jbe L(StrncpyLeave12) |
1762 | # endif |
1763 | palignr $12, %xmm2, %xmm3 |
1764 | lea 64(%ecx), %ecx |
1765 | palignr $12, %xmm1, %xmm2 |
1766 | movaps %xmm7, %xmm1 |
1767 | movaps %xmm5, 48(%edx) |
1768 | movaps %xmm4, 32(%edx) |
1769 | movaps %xmm3, 16(%edx) |
1770 | movaps %xmm2, (%edx) |
1771 | lea 64(%edx), %edx |
1772 | jmp L(Shl12LoopStart) |
1773 | |
1774 | L(Shl12LoopExit): |
1775 | movl (%ecx), %esi |
1776 | movl %esi, (%edx) |
1777 | mov $4, %esi |
1778 | jmp L(CopyFrom1To16Bytes) |
1779 | |
1780 | .p2align 4 |
1781 | L(Shl13): |
1782 | movaps -13(%ecx), %xmm1 |
1783 | movaps 3(%ecx), %xmm2 |
1784 | L(Shl13Start): |
1785 | pcmpeqb %xmm2, %xmm0 |
1786 | pmovmskb %xmm0, %eax |
1787 | movaps %xmm2, %xmm3 |
1788 | # ifdef USE_AS_STRNCPY |
1789 | sub $16, %ebx |
1790 | jbe L(StrncpyExit13Case2OrCase3) |
1791 | # endif |
1792 | test %eax, %eax |
1793 | jnz L(Shl13LoopExit) |
1794 | |
1795 | palignr $13, %xmm1, %xmm2 |
1796 | movaps %xmm3, %xmm1 |
1797 | movaps %xmm2, (%edx) |
1798 | movaps 19(%ecx), %xmm2 |
1799 | |
1800 | pcmpeqb %xmm2, %xmm0 |
1801 | lea 16(%edx), %edx |
1802 | pmovmskb %xmm0, %eax |
1803 | lea 16(%ecx), %ecx |
1804 | movaps %xmm2, %xmm3 |
1805 | # ifdef USE_AS_STRNCPY |
1806 | sub $16, %ebx |
1807 | jbe L(StrncpyExit13Case2OrCase3) |
1808 | # endif |
1809 | test %eax, %eax |
1810 | jnz L(Shl13LoopExit) |
1811 | |
1812 | palignr $13, %xmm1, %xmm2 |
1813 | movaps %xmm2, (%edx) |
1814 | movaps 19(%ecx), %xmm2 |
1815 | movaps %xmm3, %xmm1 |
1816 | |
1817 | pcmpeqb %xmm2, %xmm0 |
1818 | lea 16(%edx), %edx |
1819 | pmovmskb %xmm0, %eax |
1820 | lea 16(%ecx), %ecx |
1821 | movaps %xmm2, %xmm3 |
1822 | # ifdef USE_AS_STRNCPY |
1823 | sub $16, %ebx |
1824 | jbe L(StrncpyExit13Case2OrCase3) |
1825 | # endif |
1826 | test %eax, %eax |
1827 | jnz L(Shl13LoopExit) |
1828 | |
1829 | palignr $13, %xmm1, %xmm2 |
1830 | movaps %xmm2, (%edx) |
1831 | movaps 19(%ecx), %xmm2 |
1832 | |
1833 | pcmpeqb %xmm2, %xmm0 |
1834 | lea 16(%edx), %edx |
1835 | pmovmskb %xmm0, %eax |
1836 | lea 16(%ecx), %ecx |
1837 | # ifdef USE_AS_STRNCPY |
1838 | sub $16, %ebx |
1839 | jbe L(StrncpyExit13Case2OrCase3) |
1840 | # endif |
1841 | test %eax, %eax |
1842 | jnz L(Shl13LoopExit) |
1843 | |
1844 | palignr $13, %xmm3, %xmm2 |
1845 | movaps %xmm2, (%edx) |
1846 | lea 19(%ecx), %ecx |
1847 | lea 16(%edx), %edx |
1848 | |
1849 | mov %ecx, %eax |
1850 | and $-0x40, %ecx |
1851 | sub %ecx, %eax |
1852 | lea -3(%ecx), %ecx |
1853 | sub %eax, %edx |
1854 | # ifdef USE_AS_STRNCPY |
1855 | add %eax, %ebx |
1856 | # endif |
1857 | movaps -13(%ecx), %xmm1 |
1858 | |
1859 | L(Shl13LoopStart): |
1860 | movaps 3(%ecx), %xmm2 |
1861 | movaps 19(%ecx), %xmm3 |
1862 | movaps %xmm3, %xmm6 |
1863 | movaps 35(%ecx), %xmm4 |
1864 | movaps %xmm4, %xmm7 |
1865 | movaps 51(%ecx), %xmm5 |
1866 | pminub %xmm2, %xmm6 |
1867 | pminub %xmm5, %xmm7 |
1868 | pminub %xmm6, %xmm7 |
1869 | pcmpeqb %xmm0, %xmm7 |
1870 | pmovmskb %xmm7, %eax |
1871 | movaps %xmm5, %xmm7 |
1872 | palignr $13, %xmm4, %xmm5 |
1873 | test %eax, %eax |
1874 | palignr $13, %xmm3, %xmm4 |
1875 | jnz L(Shl13Start) |
1876 | # ifdef USE_AS_STRNCPY |
1877 | sub $64, %ebx |
1878 | jbe L(StrncpyLeave13) |
1879 | # endif |
1880 | palignr $13, %xmm2, %xmm3 |
1881 | lea 64(%ecx), %ecx |
1882 | palignr $13, %xmm1, %xmm2 |
1883 | movaps %xmm7, %xmm1 |
1884 | movaps %xmm5, 48(%edx) |
1885 | movaps %xmm4, 32(%edx) |
1886 | movaps %xmm3, 16(%edx) |
1887 | movaps %xmm2, (%edx) |
1888 | lea 64(%edx), %edx |
1889 | jmp L(Shl13LoopStart) |
1890 | |
1891 | L(Shl13LoopExit): |
1892 | movl -1(%ecx), %esi |
1893 | movl %esi, -1(%edx) |
1894 | mov $3, %esi |
1895 | jmp L(CopyFrom1To16Bytes) |
1896 | |
1897 | .p2align 4 |
1898 | L(Shl14): |
1899 | movaps -14(%ecx), %xmm1 |
1900 | movaps 2(%ecx), %xmm2 |
1901 | L(Shl14Start): |
1902 | pcmpeqb %xmm2, %xmm0 |
1903 | pmovmskb %xmm0, %eax |
1904 | movaps %xmm2, %xmm3 |
1905 | # ifdef USE_AS_STRNCPY |
1906 | sub $16, %ebx |
1907 | jbe L(StrncpyExit14Case2OrCase3) |
1908 | # endif |
1909 | test %eax, %eax |
1910 | jnz L(Shl14LoopExit) |
1911 | |
1912 | palignr $14, %xmm1, %xmm2 |
1913 | movaps %xmm3, %xmm1 |
1914 | movaps %xmm2, (%edx) |
1915 | movaps 18(%ecx), %xmm2 |
1916 | |
1917 | pcmpeqb %xmm2, %xmm0 |
1918 | lea 16(%edx), %edx |
1919 | pmovmskb %xmm0, %eax |
1920 | lea 16(%ecx), %ecx |
1921 | movaps %xmm2, %xmm3 |
1922 | # ifdef USE_AS_STRNCPY |
1923 | sub $16, %ebx |
1924 | jbe L(StrncpyExit14Case2OrCase3) |
1925 | # endif |
1926 | test %eax, %eax |
1927 | jnz L(Shl14LoopExit) |
1928 | |
1929 | palignr $14, %xmm1, %xmm2 |
1930 | movaps %xmm2, (%edx) |
1931 | movaps 18(%ecx), %xmm2 |
1932 | movaps %xmm3, %xmm1 |
1933 | |
1934 | pcmpeqb %xmm2, %xmm0 |
1935 | lea 16(%edx), %edx |
1936 | pmovmskb %xmm0, %eax |
1937 | lea 16(%ecx), %ecx |
1938 | movaps %xmm2, %xmm3 |
1939 | # ifdef USE_AS_STRNCPY |
1940 | sub $16, %ebx |
1941 | jbe L(StrncpyExit14Case2OrCase3) |
1942 | # endif |
1943 | test %eax, %eax |
1944 | jnz L(Shl14LoopExit) |
1945 | |
1946 | palignr $14, %xmm1, %xmm2 |
1947 | movaps %xmm2, (%edx) |
1948 | movaps 18(%ecx), %xmm2 |
1949 | |
1950 | pcmpeqb %xmm2, %xmm0 |
1951 | lea 16(%edx), %edx |
1952 | pmovmskb %xmm0, %eax |
1953 | lea 16(%ecx), %ecx |
1954 | # ifdef USE_AS_STRNCPY |
1955 | sub $16, %ebx |
1956 | jbe L(StrncpyExit14Case2OrCase3) |
1957 | # endif |
1958 | test %eax, %eax |
1959 | jnz L(Shl14LoopExit) |
1960 | |
1961 | palignr $14, %xmm3, %xmm2 |
1962 | movaps %xmm2, (%edx) |
1963 | lea 18(%ecx), %ecx |
1964 | lea 16(%edx), %edx |
1965 | |
1966 | mov %ecx, %eax |
1967 | and $-0x40, %ecx |
1968 | sub %ecx, %eax |
1969 | lea -2(%ecx), %ecx |
1970 | sub %eax, %edx |
1971 | # ifdef USE_AS_STRNCPY |
1972 | add %eax, %ebx |
1973 | # endif |
1974 | movaps -14(%ecx), %xmm1 |
1975 | |
1976 | L(Shl14LoopStart): |
1977 | movaps 2(%ecx), %xmm2 |
1978 | movaps 18(%ecx), %xmm3 |
1979 | movaps %xmm3, %xmm6 |
1980 | movaps 34(%ecx), %xmm4 |
1981 | movaps %xmm4, %xmm7 |
1982 | movaps 50(%ecx), %xmm5 |
1983 | pminub %xmm2, %xmm6 |
1984 | pminub %xmm5, %xmm7 |
1985 | pminub %xmm6, %xmm7 |
1986 | pcmpeqb %xmm0, %xmm7 |
1987 | pmovmskb %xmm7, %eax |
1988 | movaps %xmm5, %xmm7 |
1989 | palignr $14, %xmm4, %xmm5 |
1990 | test %eax, %eax |
1991 | palignr $14, %xmm3, %xmm4 |
1992 | jnz L(Shl14Start) |
1993 | # ifdef USE_AS_STRNCPY |
1994 | sub $64, %ebx |
1995 | jbe L(StrncpyLeave14) |
1996 | # endif |
1997 | palignr $14, %xmm2, %xmm3 |
1998 | lea 64(%ecx), %ecx |
1999 | palignr $14, %xmm1, %xmm2 |
2000 | movaps %xmm7, %xmm1 |
2001 | movaps %xmm5, 48(%edx) |
2002 | movaps %xmm4, 32(%edx) |
2003 | movaps %xmm3, 16(%edx) |
2004 | movaps %xmm2, (%edx) |
2005 | lea 64(%edx), %edx |
2006 | jmp L(Shl14LoopStart) |
2007 | |
2008 | L(Shl14LoopExit): |
2009 | movl -2(%ecx), %esi |
2010 | movl %esi, -2(%edx) |
2011 | mov $2, %esi |
2012 | jmp L(CopyFrom1To16Bytes) |
2013 | |
2014 | .p2align 4 |
2015 | L(Shl15): |
2016 | movaps -15(%ecx), %xmm1 |
2017 | movaps 1(%ecx), %xmm2 |
2018 | L(Shl15Start): |
2019 | pcmpeqb %xmm2, %xmm0 |
2020 | pmovmskb %xmm0, %eax |
2021 | movaps %xmm2, %xmm3 |
2022 | # ifdef USE_AS_STRNCPY |
2023 | sub $16, %ebx |
2024 | jbe L(StrncpyExit15Case2OrCase3) |
2025 | # endif |
2026 | test %eax, %eax |
2027 | jnz L(Shl15LoopExit) |
2028 | |
2029 | palignr $15, %xmm1, %xmm2 |
2030 | movaps %xmm3, %xmm1 |
2031 | movaps %xmm2, (%edx) |
2032 | movaps 17(%ecx), %xmm2 |
2033 | |
2034 | pcmpeqb %xmm2, %xmm0 |
2035 | lea 16(%edx), %edx |
2036 | pmovmskb %xmm0, %eax |
2037 | lea 16(%ecx), %ecx |
2038 | movaps %xmm2, %xmm3 |
2039 | # ifdef USE_AS_STRNCPY |
2040 | sub $16, %ebx |
2041 | jbe L(StrncpyExit15Case2OrCase3) |
2042 | # endif |
2043 | test %eax, %eax |
2044 | jnz L(Shl15LoopExit) |
2045 | |
2046 | palignr $15, %xmm1, %xmm2 |
2047 | movaps %xmm2, (%edx) |
2048 | movaps 17(%ecx), %xmm2 |
2049 | movaps %xmm3, %xmm1 |
2050 | |
2051 | pcmpeqb %xmm2, %xmm0 |
2052 | lea 16(%edx), %edx |
2053 | pmovmskb %xmm0, %eax |
2054 | lea 16(%ecx), %ecx |
2055 | movaps %xmm2, %xmm3 |
2056 | # ifdef USE_AS_STRNCPY |
2057 | sub $16, %ebx |
2058 | jbe L(StrncpyExit15Case2OrCase3) |
2059 | # endif |
2060 | test %eax, %eax |
2061 | jnz L(Shl15LoopExit) |
2062 | |
2063 | palignr $15, %xmm1, %xmm2 |
2064 | movaps %xmm2, (%edx) |
2065 | movaps 17(%ecx), %xmm2 |
2066 | |
2067 | pcmpeqb %xmm2, %xmm0 |
2068 | lea 16(%edx), %edx |
2069 | pmovmskb %xmm0, %eax |
2070 | lea 16(%ecx), %ecx |
2071 | # ifdef USE_AS_STRNCPY |
2072 | sub $16, %ebx |
2073 | jbe L(StrncpyExit15Case2OrCase3) |
2074 | # endif |
2075 | test %eax, %eax |
2076 | jnz L(Shl15LoopExit) |
2077 | |
2078 | palignr $15, %xmm3, %xmm2 |
2079 | movaps %xmm2, (%edx) |
2080 | lea 17(%ecx), %ecx |
2081 | lea 16(%edx), %edx |
2082 | |
2083 | mov %ecx, %eax |
2084 | and $-0x40, %ecx |
2085 | sub %ecx, %eax |
2086 | lea -1(%ecx), %ecx |
2087 | sub %eax, %edx |
2088 | # ifdef USE_AS_STRNCPY |
2089 | add %eax, %ebx |
2090 | # endif |
2091 | movaps -15(%ecx), %xmm1 |
2092 | |
2093 | L(Shl15LoopStart): |
2094 | movaps 1(%ecx), %xmm2 |
2095 | movaps 17(%ecx), %xmm3 |
2096 | movaps %xmm3, %xmm6 |
2097 | movaps 33(%ecx), %xmm4 |
2098 | movaps %xmm4, %xmm7 |
2099 | movaps 49(%ecx), %xmm5 |
2100 | pminub %xmm2, %xmm6 |
2101 | pminub %xmm5, %xmm7 |
2102 | pminub %xmm6, %xmm7 |
2103 | pcmpeqb %xmm0, %xmm7 |
2104 | pmovmskb %xmm7, %eax |
2105 | movaps %xmm5, %xmm7 |
2106 | palignr $15, %xmm4, %xmm5 |
2107 | test %eax, %eax |
2108 | palignr $15, %xmm3, %xmm4 |
2109 | jnz L(Shl15Start) |
2110 | # ifdef USE_AS_STRNCPY |
2111 | sub $64, %ebx |
2112 | jbe L(StrncpyLeave15) |
2113 | # endif |
2114 | palignr $15, %xmm2, %xmm3 |
2115 | lea 64(%ecx), %ecx |
2116 | palignr $15, %xmm1, %xmm2 |
2117 | movaps %xmm7, %xmm1 |
2118 | movaps %xmm5, 48(%edx) |
2119 | movaps %xmm4, 32(%edx) |
2120 | movaps %xmm3, 16(%edx) |
2121 | movaps %xmm2, (%edx) |
2122 | lea 64(%edx), %edx |
2123 | jmp L(Shl15LoopStart) |
2124 | |
2125 | L(Shl15LoopExit): |
2126 | movl -3(%ecx), %esi |
2127 | movl %esi, -3(%edx) |
2128 | mov $1, %esi |
2129 | # ifdef USE_AS_STRCAT |
2130 | jmp L(CopyFrom1To16Bytes) |
2131 | # endif |
2132 | |
2133 | |
2134 | # ifndef USE_AS_STRCAT |
2135 | |
2136 | .p2align 4 |
2137 | L(CopyFrom1To16Bytes): |
2138 | # ifdef USE_AS_STRNCPY |
2139 | add $16, %ebx |
2140 | # endif |
2141 | add %esi, %edx |
2142 | add %esi, %ecx |
2143 | |
2144 | POP (%esi) |
2145 | test %al, %al |
2146 | jz L(ExitHigh8) |
2147 | |
2148 | L(CopyFrom1To16BytesLess8): |
2149 | mov %al, %ah |
2150 | and $15, %ah |
2151 | jz L(ExitHigh4) |
2152 | |
2153 | test $0x01, %al |
2154 | jnz L(Exit1) |
2155 | test $0x02, %al |
2156 | jnz L(Exit2) |
2157 | test $0x04, %al |
2158 | jnz L(Exit3) |
2159 | |
2160 | .p2align 4 |
2161 | L(Exit4): |
2162 | movl (%ecx), %eax |
2163 | movl %eax, (%edx) |
2164 | SAVE_RESULT (3) |
2165 | # ifdef USE_AS_STRNCPY |
2166 | sub $4, %ebx |
2167 | lea 4(%edx), %ecx |
2168 | jnz L(StrncpyFillTailWithZero1) |
2169 | # ifdef USE_AS_STPCPY |
2170 | cmpb $1, (%eax) |
2171 | sbb $-1, %eax |
2172 | # endif |
2173 | # endif |
2174 | RETURN1 |
2175 | |
2176 | .p2align 4 |
2177 | L(ExitHigh4): |
2178 | test $0x10, %al |
2179 | jnz L(Exit5) |
2180 | test $0x20, %al |
2181 | jnz L(Exit6) |
2182 | test $0x40, %al |
2183 | jnz L(Exit7) |
2184 | |
2185 | .p2align 4 |
2186 | L(Exit8): |
2187 | movlpd (%ecx), %xmm0 |
2188 | movlpd %xmm0, (%edx) |
2189 | SAVE_RESULT (7) |
2190 | # ifdef USE_AS_STRNCPY |
2191 | sub $8, %ebx |
2192 | lea 8(%edx), %ecx |
2193 | jnz L(StrncpyFillTailWithZero1) |
2194 | # ifdef USE_AS_STPCPY |
2195 | cmpb $1, (%eax) |
2196 | sbb $-1, %eax |
2197 | # endif |
2198 | # endif |
2199 | RETURN1 |
2200 | |
2201 | .p2align 4 |
2202 | L(ExitHigh8): |
2203 | mov %ah, %al |
2204 | and $15, %al |
2205 | jz L(ExitHigh12) |
2206 | |
2207 | test $0x01, %ah |
2208 | jnz L(Exit9) |
2209 | test $0x02, %ah |
2210 | jnz L(Exit10) |
2211 | test $0x04, %ah |
2212 | jnz L(Exit11) |
2213 | |
2214 | .p2align 4 |
2215 | L(Exit12): |
2216 | movlpd (%ecx), %xmm0 |
2217 | movl 8(%ecx), %eax |
2218 | movlpd %xmm0, (%edx) |
2219 | movl %eax, 8(%edx) |
2220 | SAVE_RESULT (11) |
2221 | # ifdef USE_AS_STRNCPY |
2222 | sub $12, %ebx |
2223 | lea 12(%edx), %ecx |
2224 | jnz L(StrncpyFillTailWithZero1) |
2225 | # ifdef USE_AS_STPCPY |
2226 | cmpb $1, (%eax) |
2227 | sbb $-1, %eax |
2228 | # endif |
2229 | # endif |
2230 | RETURN1 |
2231 | |
2232 | .p2align 4 |
2233 | L(ExitHigh12): |
2234 | test $0x10, %ah |
2235 | jnz L(Exit13) |
2236 | test $0x20, %ah |
2237 | jnz L(Exit14) |
2238 | test $0x40, %ah |
2239 | jnz L(Exit15) |
2240 | |
2241 | .p2align 4 |
2242 | L(Exit16): |
2243 | movdqu (%ecx), %xmm0 |
2244 | movdqu %xmm0, (%edx) |
2245 | SAVE_RESULT (15) |
2246 | # ifdef USE_AS_STRNCPY |
2247 | sub $16, %ebx |
2248 | lea 16(%edx), %ecx |
2249 | jnz L(StrncpyFillTailWithZero1) |
2250 | # ifdef USE_AS_STPCPY |
2251 | cmpb $1, (%eax) |
2252 | sbb $-1, %eax |
2253 | # endif |
2254 | # endif |
2255 | RETURN1 |
2256 | |
2257 | # ifdef USE_AS_STRNCPY |
2258 | |
2259 | CFI_PUSH(%esi) |
2260 | |
2261 | .p2align 4 |
2262 | L(CopyFrom1To16BytesCase2): |
2263 | add $16, %ebx |
2264 | add %esi, %ecx |
2265 | add %esi, %edx |
2266 | |
2267 | POP (%esi) |
2268 | |
2269 | test %al, %al |
2270 | jz L(ExitHighCase2) |
2271 | |
2272 | cmp $8, %ebx |
2273 | ja L(CopyFrom1To16BytesLess8) |
2274 | |
2275 | test $0x01, %al |
2276 | jnz L(Exit1) |
2277 | cmp $1, %ebx |
2278 | je L(Exit1) |
2279 | test $0x02, %al |
2280 | jnz L(Exit2) |
2281 | cmp $2, %ebx |
2282 | je L(Exit2) |
2283 | test $0x04, %al |
2284 | jnz L(Exit3) |
2285 | cmp $3, %ebx |
2286 | je L(Exit3) |
2287 | test $0x08, %al |
2288 | jnz L(Exit4) |
2289 | cmp $4, %ebx |
2290 | je L(Exit4) |
2291 | test $0x10, %al |
2292 | jnz L(Exit5) |
2293 | cmp $5, %ebx |
2294 | je L(Exit5) |
2295 | test $0x20, %al |
2296 | jnz L(Exit6) |
2297 | cmp $6, %ebx |
2298 | je L(Exit6) |
2299 | test $0x40, %al |
2300 | jnz L(Exit7) |
2301 | cmp $7, %ebx |
2302 | je L(Exit7) |
2303 | jmp L(Exit8) |
2304 | |
2305 | .p2align 4 |
2306 | L(ExitHighCase2): |
2307 | cmp $8, %ebx |
2308 | jbe L(CopyFrom1To16BytesLess8Case3) |
2309 | |
2310 | test $0x01, %ah |
2311 | jnz L(Exit9) |
2312 | cmp $9, %ebx |
2313 | je L(Exit9) |
2314 | test $0x02, %ah |
2315 | jnz L(Exit10) |
2316 | cmp $10, %ebx |
2317 | je L(Exit10) |
2318 | test $0x04, %ah |
2319 | jnz L(Exit11) |
2320 | cmp $11, %ebx |
2321 | je L(Exit11) |
2322 | test $0x8, %ah |
2323 | jnz L(Exit12) |
2324 | cmp $12, %ebx |
2325 | je L(Exit12) |
2326 | test $0x10, %ah |
2327 | jnz L(Exit13) |
2328 | cmp $13, %ebx |
2329 | je L(Exit13) |
2330 | test $0x20, %ah |
2331 | jnz L(Exit14) |
2332 | cmp $14, %ebx |
2333 | je L(Exit14) |
2334 | test $0x40, %ah |
2335 | jnz L(Exit15) |
2336 | cmp $15, %ebx |
2337 | je L(Exit15) |
2338 | jmp L(Exit16) |
2339 | |
2340 | CFI_PUSH(%esi) |
2341 | |
2342 | .p2align 4 |
2343 | L(CopyFrom1To16BytesCase2OrCase3): |
2344 | test %eax, %eax |
2345 | jnz L(CopyFrom1To16BytesCase2) |
2346 | |
2347 | .p2align 4 |
2348 | L(CopyFrom1To16BytesCase3): |
2349 | add $16, %ebx |
2350 | add %esi, %edx |
2351 | add %esi, %ecx |
2352 | |
2353 | POP (%esi) |
2354 | |
2355 | cmp $8, %ebx |
2356 | ja L(ExitHigh8Case3) |
2357 | |
2358 | L(CopyFrom1To16BytesLess8Case3): |
2359 | cmp $4, %ebx |
2360 | ja L(ExitHigh4Case3) |
2361 | |
2362 | cmp $1, %ebx |
2363 | je L(Exit1) |
2364 | cmp $2, %ebx |
2365 | je L(Exit2) |
2366 | cmp $3, %ebx |
2367 | je L(Exit3) |
2368 | movl (%ecx), %eax |
2369 | movl %eax, (%edx) |
2370 | SAVE_RESULT (4) |
2371 | RETURN1 |
2372 | |
2373 | .p2align 4 |
2374 | L(ExitHigh4Case3): |
2375 | cmp $5, %ebx |
2376 | je L(Exit5) |
2377 | cmp $6, %ebx |
2378 | je L(Exit6) |
2379 | cmp $7, %ebx |
2380 | je L(Exit7) |
2381 | movlpd (%ecx), %xmm0 |
2382 | movlpd %xmm0, (%edx) |
2383 | SAVE_RESULT (8) |
2384 | RETURN1 |
2385 | |
2386 | .p2align 4 |
2387 | L(ExitHigh8Case3): |
2388 | cmp $12, %ebx |
2389 | ja L(ExitHigh12Case3) |
2390 | |
2391 | cmp $9, %ebx |
2392 | je L(Exit9) |
2393 | cmp $10, %ebx |
2394 | je L(Exit10) |
2395 | cmp $11, %ebx |
2396 | je L(Exit11) |
2397 | movlpd (%ecx), %xmm0 |
2398 | movl 8(%ecx), %eax |
2399 | movlpd %xmm0, (%edx) |
2400 | movl %eax, 8(%edx) |
2401 | SAVE_RESULT (12) |
2402 | RETURN1 |
2403 | |
2404 | .p2align 4 |
2405 | L(ExitHigh12Case3): |
2406 | cmp $13, %ebx |
2407 | je L(Exit13) |
2408 | cmp $14, %ebx |
2409 | je L(Exit14) |
2410 | cmp $15, %ebx |
2411 | je L(Exit15) |
2412 | movlpd (%ecx), %xmm0 |
2413 | movlpd 8(%ecx), %xmm1 |
2414 | movlpd %xmm0, (%edx) |
2415 | movlpd %xmm1, 8(%edx) |
2416 | SAVE_RESULT (16) |
2417 | RETURN1 |
2418 | |
2419 | # endif |
2420 | |
2421 | .p2align 4 |
2422 | L(Exit1): |
2423 | movb (%ecx), %al |
2424 | movb %al, (%edx) |
2425 | SAVE_RESULT (0) |
2426 | # ifdef USE_AS_STRNCPY |
2427 | sub $1, %ebx |
2428 | lea 1(%edx), %ecx |
2429 | jnz L(StrncpyFillTailWithZero1) |
2430 | # ifdef USE_AS_STPCPY |
2431 | cmpb $1, (%eax) |
2432 | sbb $-1, %eax |
2433 | # endif |
2434 | # endif |
2435 | RETURN1 |
2436 | |
2437 | .p2align 4 |
2438 | L(Exit2): |
2439 | movw (%ecx), %ax |
2440 | movw %ax, (%edx) |
2441 | SAVE_RESULT (1) |
2442 | # ifdef USE_AS_STRNCPY |
2443 | sub $2, %ebx |
2444 | lea 2(%edx), %ecx |
2445 | jnz L(StrncpyFillTailWithZero1) |
2446 | # ifdef USE_AS_STPCPY |
2447 | cmpb $1, (%eax) |
2448 | sbb $-1, %eax |
2449 | # endif |
2450 | # endif |
2451 | RETURN1 |
2452 | |
2453 | .p2align 4 |
2454 | L(Exit3): |
2455 | movw (%ecx), %ax |
2456 | movw %ax, (%edx) |
2457 | movb 2(%ecx), %al |
2458 | movb %al, 2(%edx) |
2459 | SAVE_RESULT (2) |
2460 | # ifdef USE_AS_STRNCPY |
2461 | sub $3, %ebx |
2462 | lea 3(%edx), %ecx |
2463 | jnz L(StrncpyFillTailWithZero1) |
2464 | # ifdef USE_AS_STPCPY |
2465 | cmpb $1, (%eax) |
2466 | sbb $-1, %eax |
2467 | # endif |
2468 | # endif |
2469 | RETURN1 |
2470 | |
2471 | .p2align 4 |
2472 | L(Exit5): |
2473 | movl (%ecx), %eax |
2474 | movl %eax, (%edx) |
2475 | movb 4(%ecx), %al |
2476 | movb %al, 4(%edx) |
2477 | SAVE_RESULT (4) |
2478 | # ifdef USE_AS_STRNCPY |
2479 | sub $5, %ebx |
2480 | lea 5(%edx), %ecx |
2481 | jnz L(StrncpyFillTailWithZero1) |
2482 | # ifdef USE_AS_STPCPY |
2483 | cmpb $1, (%eax) |
2484 | sbb $-1, %eax |
2485 | # endif |
2486 | # endif |
2487 | RETURN1 |
2488 | |
2489 | .p2align 4 |
2490 | L(Exit6): |
2491 | movl (%ecx), %eax |
2492 | movl %eax, (%edx) |
2493 | movw 4(%ecx), %ax |
2494 | movw %ax, 4(%edx) |
2495 | SAVE_RESULT (5) |
2496 | # ifdef USE_AS_STRNCPY |
2497 | sub $6, %ebx |
2498 | lea 6(%edx), %ecx |
2499 | jnz L(StrncpyFillTailWithZero1) |
2500 | # ifdef USE_AS_STPCPY |
2501 | cmpb $1, (%eax) |
2502 | sbb $-1, %eax |
2503 | # endif |
2504 | # endif |
2505 | RETURN1 |
2506 | |
2507 | .p2align 4 |
2508 | L(Exit7): |
2509 | movl (%ecx), %eax |
2510 | movl %eax, (%edx) |
2511 | movl 3(%ecx), %eax |
2512 | movl %eax, 3(%edx) |
2513 | SAVE_RESULT (6) |
2514 | # ifdef USE_AS_STRNCPY |
2515 | sub $7, %ebx |
2516 | lea 7(%edx), %ecx |
2517 | jnz L(StrncpyFillTailWithZero1) |
2518 | # ifdef USE_AS_STPCPY |
2519 | cmpb $1, (%eax) |
2520 | sbb $-1, %eax |
2521 | # endif |
2522 | # endif |
2523 | RETURN1 |
2524 | |
2525 | .p2align 4 |
2526 | L(Exit9): |
2527 | movlpd (%ecx), %xmm0 |
2528 | movb 8(%ecx), %al |
2529 | movlpd %xmm0, (%edx) |
2530 | movb %al, 8(%edx) |
2531 | SAVE_RESULT (8) |
2532 | # ifdef USE_AS_STRNCPY |
2533 | sub $9, %ebx |
2534 | lea 9(%edx), %ecx |
2535 | jnz L(StrncpyFillTailWithZero1) |
2536 | # ifdef USE_AS_STPCPY |
2537 | cmpb $1, (%eax) |
2538 | sbb $-1, %eax |
2539 | # endif |
2540 | # endif |
2541 | RETURN1 |
2542 | |
2543 | .p2align 4 |
2544 | L(Exit10): |
2545 | movlpd (%ecx), %xmm0 |
2546 | movw 8(%ecx), %ax |
2547 | movlpd %xmm0, (%edx) |
2548 | movw %ax, 8(%edx) |
2549 | SAVE_RESULT (9) |
2550 | # ifdef USE_AS_STRNCPY |
2551 | sub $10, %ebx |
2552 | lea 10(%edx), %ecx |
2553 | jnz L(StrncpyFillTailWithZero1) |
2554 | # ifdef USE_AS_STPCPY |
2555 | cmpb $1, (%eax) |
2556 | sbb $-1, %eax |
2557 | # endif |
2558 | # endif |
2559 | RETURN1 |
2560 | |
2561 | .p2align 4 |
2562 | L(Exit11): |
2563 | movlpd (%ecx), %xmm0 |
2564 | movl 7(%ecx), %eax |
2565 | movlpd %xmm0, (%edx) |
2566 | movl %eax, 7(%edx) |
2567 | SAVE_RESULT (10) |
2568 | # ifdef USE_AS_STRNCPY |
2569 | sub $11, %ebx |
2570 | lea 11(%edx), %ecx |
2571 | jnz L(StrncpyFillTailWithZero1) |
2572 | # ifdef USE_AS_STPCPY |
2573 | cmpb $1, (%eax) |
2574 | sbb $-1, %eax |
2575 | # endif |
2576 | # endif |
2577 | RETURN1 |
2578 | |
2579 | .p2align 4 |
2580 | L(Exit13): |
2581 | movlpd (%ecx), %xmm0 |
2582 | movlpd 5(%ecx), %xmm1 |
2583 | movlpd %xmm0, (%edx) |
2584 | movlpd %xmm1, 5(%edx) |
2585 | SAVE_RESULT (12) |
2586 | # ifdef USE_AS_STRNCPY |
2587 | sub $13, %ebx |
2588 | lea 13(%edx), %ecx |
2589 | jnz L(StrncpyFillTailWithZero1) |
2590 | # ifdef USE_AS_STPCPY |
2591 | cmpb $1, (%eax) |
2592 | sbb $-1, %eax |
2593 | # endif |
2594 | # endif |
2595 | RETURN1 |
2596 | |
2597 | .p2align 4 |
2598 | L(Exit14): |
2599 | movlpd (%ecx), %xmm0 |
2600 | movlpd 6(%ecx), %xmm1 |
2601 | movlpd %xmm0, (%edx) |
2602 | movlpd %xmm1, 6(%edx) |
2603 | SAVE_RESULT (13) |
2604 | # ifdef USE_AS_STRNCPY |
2605 | sub $14, %ebx |
2606 | lea 14(%edx), %ecx |
2607 | jnz L(StrncpyFillTailWithZero1) |
2608 | # ifdef USE_AS_STPCPY |
2609 | cmpb $1, (%eax) |
2610 | sbb $-1, %eax |
2611 | # endif |
2612 | # endif |
2613 | RETURN1 |
2614 | |
2615 | .p2align 4 |
2616 | L(Exit15): |
2617 | movlpd (%ecx), %xmm0 |
2618 | movlpd 7(%ecx), %xmm1 |
2619 | movlpd %xmm0, (%edx) |
2620 | movlpd %xmm1, 7(%edx) |
2621 | SAVE_RESULT (14) |
2622 | # ifdef USE_AS_STRNCPY |
2623 | sub $15, %ebx |
2624 | lea 15(%edx), %ecx |
2625 | jnz L(StrncpyFillTailWithZero1) |
2626 | # ifdef USE_AS_STPCPY |
2627 | cmpb $1, (%eax) |
2628 | sbb $-1, %eax |
2629 | # endif |
2630 | # endif |
2631 | RETURN1 |
2632 | |
2633 | CFI_POP (%edi) |
2634 | |
2635 | # ifdef USE_AS_STRNCPY |
2636 | .p2align 4 |
2637 | L(Fill0): |
2638 | RETURN |
2639 | |
2640 | .p2align 4 |
2641 | L(Fill1): |
2642 | movb %dl, (%ecx) |
2643 | RETURN |
2644 | |
2645 | .p2align 4 |
2646 | L(Fill2): |
2647 | movw %dx, (%ecx) |
2648 | RETURN |
2649 | |
2650 | .p2align 4 |
2651 | L(Fill3): |
2652 | movw %dx, (%ecx) |
2653 | movb %dl, 2(%ecx) |
2654 | RETURN |
2655 | |
2656 | .p2align 4 |
2657 | L(Fill4): |
2658 | movl %edx, (%ecx) |
2659 | RETURN |
2660 | |
2661 | .p2align 4 |
2662 | L(Fill5): |
2663 | movl %edx, (%ecx) |
2664 | movb %dl, 4(%ecx) |
2665 | RETURN |
2666 | |
2667 | .p2align 4 |
2668 | L(Fill6): |
2669 | movl %edx, (%ecx) |
2670 | movw %dx, 4(%ecx) |
2671 | RETURN |
2672 | |
2673 | .p2align 4 |
2674 | L(Fill7): |
2675 | movl %edx, (%ecx) |
2676 | movl %edx, 3(%ecx) |
2677 | RETURN |
2678 | |
2679 | .p2align 4 |
2680 | L(Fill8): |
2681 | movlpd %xmm0, (%ecx) |
2682 | RETURN |
2683 | |
2684 | .p2align 4 |
2685 | L(Fill9): |
2686 | movlpd %xmm0, (%ecx) |
2687 | movb %dl, 8(%ecx) |
2688 | RETURN |
2689 | |
2690 | .p2align 4 |
2691 | L(Fill10): |
2692 | movlpd %xmm0, (%ecx) |
2693 | movw %dx, 8(%ecx) |
2694 | RETURN |
2695 | |
2696 | .p2align 4 |
2697 | L(Fill11): |
2698 | movlpd %xmm0, (%ecx) |
2699 | movl %edx, 7(%ecx) |
2700 | RETURN |
2701 | |
2702 | .p2align 4 |
2703 | L(Fill12): |
2704 | movlpd %xmm0, (%ecx) |
2705 | movl %edx, 8(%ecx) |
2706 | RETURN |
2707 | |
2708 | .p2align 4 |
2709 | L(Fill13): |
2710 | movlpd %xmm0, (%ecx) |
2711 | movlpd %xmm0, 5(%ecx) |
2712 | RETURN |
2713 | |
2714 | .p2align 4 |
2715 | L(Fill14): |
2716 | movlpd %xmm0, (%ecx) |
2717 | movlpd %xmm0, 6(%ecx) |
2718 | RETURN |
2719 | |
2720 | .p2align 4 |
2721 | L(Fill15): |
2722 | movlpd %xmm0, (%ecx) |
2723 | movlpd %xmm0, 7(%ecx) |
2724 | RETURN |
2725 | |
2726 | .p2align 4 |
2727 | L(Fill16): |
2728 | movlpd %xmm0, (%ecx) |
2729 | movlpd %xmm0, 8(%ecx) |
2730 | RETURN |
2731 | |
2732 | .p2align 4 |
2733 | L(StrncpyFillExit1): |
2734 | lea 16(%ebx), %ebx |
2735 | L(FillFrom1To16Bytes): |
2736 | test %ebx, %ebx |
2737 | jz L(Fill0) |
2738 | cmp $16, %ebx |
2739 | je L(Fill16) |
2740 | cmp $8, %ebx |
2741 | je L(Fill8) |
2742 | jg L(FillMore8) |
2743 | cmp $4, %ebx |
2744 | je L(Fill4) |
2745 | jg L(FillMore4) |
2746 | cmp $2, %ebx |
2747 | jl L(Fill1) |
2748 | je L(Fill2) |
2749 | jg L(Fill3) |
2750 | L(FillMore8): /* but less than 16 */ |
2751 | cmp $12, %ebx |
2752 | je L(Fill12) |
2753 | jl L(FillLess12) |
2754 | cmp $14, %ebx |
2755 | jl L(Fill13) |
2756 | je L(Fill14) |
2757 | jg L(Fill15) |
2758 | L(FillMore4): /* but less than 8 */ |
2759 | cmp $6, %ebx |
2760 | jl L(Fill5) |
2761 | je L(Fill6) |
2762 | jg L(Fill7) |
2763 | L(FillLess12): /* but more than 8 */ |
2764 | cmp $10, %ebx |
2765 | jl L(Fill9) |
2766 | je L(Fill10) |
2767 | jmp L(Fill11) |
2768 | |
2769 | CFI_PUSH(%edi) |
2770 | |
2771 | .p2align 4 |
2772 | L(StrncpyFillTailWithZero1): |
2773 | POP (%edi) |
2774 | L(StrncpyFillTailWithZero): |
2775 | pxor %xmm0, %xmm0 |
2776 | xor %edx, %edx |
2777 | sub $16, %ebx |
2778 | jbe L(StrncpyFillExit1) |
2779 | |
2780 | movlpd %xmm0, (%ecx) |
2781 | movlpd %xmm0, 8(%ecx) |
2782 | |
2783 | lea 16(%ecx), %ecx |
2784 | |
2785 | mov %ecx, %edx |
2786 | and $0xf, %edx |
2787 | sub %edx, %ecx |
2788 | add %edx, %ebx |
2789 | xor %edx, %edx |
2790 | sub $64, %ebx |
2791 | jb L(StrncpyFillLess64) |
2792 | |
2793 | L(StrncpyFillLoopMovdqa): |
2794 | movdqa %xmm0, (%ecx) |
2795 | movdqa %xmm0, 16(%ecx) |
2796 | movdqa %xmm0, 32(%ecx) |
2797 | movdqa %xmm0, 48(%ecx) |
2798 | lea 64(%ecx), %ecx |
2799 | sub $64, %ebx |
2800 | jae L(StrncpyFillLoopMovdqa) |
2801 | |
2802 | L(StrncpyFillLess64): |
2803 | add $32, %ebx |
2804 | jl L(StrncpyFillLess32) |
2805 | movdqa %xmm0, (%ecx) |
2806 | movdqa %xmm0, 16(%ecx) |
2807 | lea 32(%ecx), %ecx |
2808 | sub $16, %ebx |
2809 | jl L(StrncpyFillExit1) |
2810 | movdqa %xmm0, (%ecx) |
2811 | lea 16(%ecx), %ecx |
2812 | jmp L(FillFrom1To16Bytes) |
2813 | |
2814 | L(StrncpyFillLess32): |
2815 | add $16, %ebx |
2816 | jl L(StrncpyFillExit1) |
2817 | movdqa %xmm0, (%ecx) |
2818 | lea 16(%ecx), %ecx |
2819 | jmp L(FillFrom1To16Bytes) |
2820 | # endif |
2821 | |
2822 | .p2align 4 |
2823 | L(ExitTail1): |
2824 | movb (%ecx), %al |
2825 | movb %al, (%edx) |
2826 | SAVE_RESULT_TAIL (0) |
2827 | # ifdef USE_AS_STRNCPY |
2828 | sub $1, %ebx |
2829 | lea 1(%edx), %ecx |
2830 | jnz L(StrncpyFillTailWithZero) |
2831 | # ifdef USE_AS_STPCPY |
2832 | cmpb $1, (%eax) |
2833 | sbb $-1, %eax |
2834 | # endif |
2835 | # endif |
2836 | RETURN |
2837 | |
2838 | .p2align 4 |
2839 | L(ExitTail2): |
2840 | movw (%ecx), %ax |
2841 | movw %ax, (%edx) |
2842 | SAVE_RESULT_TAIL (1) |
2843 | # ifdef USE_AS_STRNCPY |
2844 | sub $2, %ebx |
2845 | lea 2(%edx), %ecx |
2846 | jnz L(StrncpyFillTailWithZero) |
2847 | # ifdef USE_AS_STPCPY |
2848 | cmpb $1, (%eax) |
2849 | sbb $-1, %eax |
2850 | # endif |
2851 | # endif |
2852 | RETURN |
2853 | |
2854 | .p2align 4 |
2855 | L(ExitTail3): |
2856 | movw (%ecx), %ax |
2857 | movw %ax, (%edx) |
2858 | movb 2(%ecx), %al |
2859 | movb %al, 2(%edx) |
2860 | SAVE_RESULT_TAIL (2) |
2861 | # ifdef USE_AS_STRNCPY |
2862 | sub $3, %ebx |
2863 | lea 3(%edx), %ecx |
2864 | jnz L(StrncpyFillTailWithZero) |
2865 | # ifdef USE_AS_STPCPY |
2866 | cmpb $1, (%eax) |
2867 | sbb $-1, %eax |
2868 | # endif |
2869 | # endif |
2870 | RETURN |
2871 | |
2872 | .p2align 4 |
2873 | L(ExitTail4): |
2874 | movl (%ecx), %eax |
2875 | movl %eax, (%edx) |
2876 | SAVE_RESULT_TAIL (3) |
2877 | # ifdef USE_AS_STRNCPY |
2878 | sub $4, %ebx |
2879 | lea 4(%edx), %ecx |
2880 | jnz L(StrncpyFillTailWithZero) |
2881 | # ifdef USE_AS_STPCPY |
2882 | cmpb $1, (%eax) |
2883 | sbb $-1, %eax |
2884 | # endif |
2885 | # endif |
2886 | RETURN |
2887 | |
2888 | .p2align 4 |
2889 | L(ExitTail5): |
2890 | movl (%ecx), %eax |
2891 | movl %eax, (%edx) |
2892 | movb 4(%ecx), %al |
2893 | movb %al, 4(%edx) |
2894 | SAVE_RESULT_TAIL (4) |
2895 | # ifdef USE_AS_STRNCPY |
2896 | sub $5, %ebx |
2897 | lea 5(%edx), %ecx |
2898 | jnz L(StrncpyFillTailWithZero) |
2899 | # ifdef USE_AS_STPCPY |
2900 | cmpb $1, (%eax) |
2901 | sbb $-1, %eax |
2902 | # endif |
2903 | # endif |
2904 | RETURN |
2905 | |
2906 | .p2align 4 |
2907 | L(ExitTail6): |
2908 | movl (%ecx), %eax |
2909 | movl %eax, (%edx) |
2910 | movw 4(%ecx), %ax |
2911 | movw %ax, 4(%edx) |
2912 | SAVE_RESULT_TAIL (5) |
2913 | # ifdef USE_AS_STRNCPY |
2914 | sub $6, %ebx |
2915 | lea 6(%edx), %ecx |
2916 | jnz L(StrncpyFillTailWithZero) |
2917 | # ifdef USE_AS_STPCPY |
2918 | cmpb $1, (%eax) |
2919 | sbb $-1, %eax |
2920 | # endif |
2921 | # endif |
2922 | RETURN |
2923 | |
2924 | .p2align 4 |
2925 | L(ExitTail7): |
2926 | movl (%ecx), %eax |
2927 | movl %eax, (%edx) |
2928 | movl 3(%ecx), %eax |
2929 | movl %eax, 3(%edx) |
2930 | SAVE_RESULT_TAIL (6) |
2931 | # ifdef USE_AS_STRNCPY |
2932 | sub $7, %ebx |
2933 | lea 7(%edx), %ecx |
2934 | jnz L(StrncpyFillTailWithZero) |
2935 | # ifdef USE_AS_STPCPY |
2936 | cmpb $1, (%eax) |
2937 | sbb $-1, %eax |
2938 | # endif |
2939 | # endif |
2940 | RETURN |
2941 | |
2942 | .p2align 4 |
2943 | L(ExitTail8): |
2944 | movlpd (%ecx), %xmm0 |
2945 | movlpd %xmm0, (%edx) |
2946 | SAVE_RESULT_TAIL (7) |
2947 | # ifdef USE_AS_STRNCPY |
2948 | sub $8, %ebx |
2949 | lea 8(%edx), %ecx |
2950 | jnz L(StrncpyFillTailWithZero) |
2951 | # endif |
2952 | RETURN |
2953 | |
2954 | .p2align 4 |
2955 | L(ExitTail9): |
2956 | movlpd (%ecx), %xmm0 |
2957 | movb 8(%ecx), %al |
2958 | movlpd %xmm0, (%edx) |
2959 | movb %al, 8(%edx) |
2960 | SAVE_RESULT_TAIL (8) |
2961 | # ifdef USE_AS_STRNCPY |
2962 | sub $9, %ebx |
2963 | lea 9(%edx), %ecx |
2964 | jnz L(StrncpyFillTailWithZero) |
2965 | # ifdef USE_AS_STPCPY |
2966 | cmpb $1, (%eax) |
2967 | sbb $-1, %eax |
2968 | # endif |
2969 | # endif |
2970 | RETURN |
2971 | |
2972 | .p2align 4 |
2973 | L(ExitTail10): |
2974 | movlpd (%ecx), %xmm0 |
2975 | movw 8(%ecx), %ax |
2976 | movlpd %xmm0, (%edx) |
2977 | movw %ax, 8(%edx) |
2978 | SAVE_RESULT_TAIL (9) |
2979 | # ifdef USE_AS_STRNCPY |
2980 | sub $10, %ebx |
2981 | lea 10(%edx), %ecx |
2982 | jnz L(StrncpyFillTailWithZero) |
2983 | # ifdef USE_AS_STPCPY |
2984 | cmpb $1, (%eax) |
2985 | sbb $-1, %eax |
2986 | # endif |
2987 | # endif |
2988 | RETURN |
2989 | |
2990 | .p2align 4 |
2991 | L(ExitTail11): |
2992 | movlpd (%ecx), %xmm0 |
2993 | movl 7(%ecx), %eax |
2994 | movlpd %xmm0, (%edx) |
2995 | movl %eax, 7(%edx) |
2996 | SAVE_RESULT_TAIL (10) |
2997 | # ifdef USE_AS_STRNCPY |
2998 | sub $11, %ebx |
2999 | lea 11(%edx), %ecx |
3000 | jnz L(StrncpyFillTailWithZero) |
3001 | # ifdef USE_AS_STPCPY |
3002 | cmpb $1, (%eax) |
3003 | sbb $-1, %eax |
3004 | # endif |
3005 | # endif |
3006 | RETURN |
3007 | |
3008 | .p2align 4 |
3009 | L(ExitTail12): |
3010 | movlpd (%ecx), %xmm0 |
3011 | movl 8(%ecx), %eax |
3012 | movlpd %xmm0, (%edx) |
3013 | movl %eax, 8(%edx) |
3014 | SAVE_RESULT_TAIL (11) |
3015 | # ifdef USE_AS_STRNCPY |
3016 | sub $12, %ebx |
3017 | lea 12(%edx), %ecx |
3018 | jnz L(StrncpyFillTailWithZero) |
3019 | # ifdef USE_AS_STPCPY |
3020 | cmpb $1, (%eax) |
3021 | sbb $-1, %eax |
3022 | # endif |
3023 | # endif |
3024 | RETURN |
3025 | |
3026 | .p2align 4 |
3027 | L(ExitTail13): |
3028 | movlpd (%ecx), %xmm0 |
3029 | movlpd 5(%ecx), %xmm1 |
3030 | movlpd %xmm0, (%edx) |
3031 | movlpd %xmm1, 5(%edx) |
3032 | SAVE_RESULT_TAIL (12) |
3033 | # ifdef USE_AS_STRNCPY |
3034 | sub $13, %ebx |
3035 | lea 13(%edx), %ecx |
3036 | jnz L(StrncpyFillTailWithZero) |
3037 | # ifdef USE_AS_STPCPY |
3038 | cmpb $1, (%eax) |
3039 | sbb $-1, %eax |
3040 | # endif |
3041 | # endif |
3042 | RETURN |
3043 | |
3044 | .p2align 4 |
3045 | L(ExitTail14): |
3046 | movlpd (%ecx), %xmm0 |
3047 | movlpd 6(%ecx), %xmm1 |
3048 | movlpd %xmm0, (%edx) |
3049 | movlpd %xmm1, 6(%edx) |
3050 | SAVE_RESULT_TAIL (13) |
3051 | # ifdef USE_AS_STRNCPY |
3052 | sub $14, %ebx |
3053 | lea 14(%edx), %ecx |
3054 | jnz L(StrncpyFillTailWithZero) |
3055 | # ifdef USE_AS_STPCPY |
3056 | cmpb $1, (%eax) |
3057 | sbb $-1, %eax |
3058 | # endif |
3059 | # endif |
3060 | RETURN |
3061 | |
3062 | .p2align 4 |
3063 | L(ExitTail15): |
3064 | movlpd (%ecx), %xmm0 |
3065 | movlpd 7(%ecx), %xmm1 |
3066 | movlpd %xmm0, (%edx) |
3067 | movlpd %xmm1, 7(%edx) |
3068 | SAVE_RESULT_TAIL (14) |
3069 | # ifdef USE_AS_STRNCPY |
3070 | sub $15, %ebx |
3071 | lea 15(%edx), %ecx |
3072 | jnz L(StrncpyFillTailWithZero) |
3073 | # endif |
3074 | RETURN |
3075 | |
3076 | .p2align 4 |
3077 | L(ExitTail16): |
3078 | movdqu (%ecx), %xmm0 |
3079 | movdqu %xmm0, (%edx) |
3080 | SAVE_RESULT_TAIL (15) |
3081 | # ifdef USE_AS_STRNCPY |
3082 | sub $16, %ebx |
3083 | lea 16(%edx), %ecx |
3084 | jnz L(StrncpyFillTailWithZero) |
3085 | # ifdef USE_AS_STPCPY |
3086 | cmpb $1, (%eax) |
3087 | sbb $-1, %eax |
3088 | # endif |
3089 | # endif |
3090 | RETURN |
3091 | # endif |
3092 | |
3093 | # ifdef USE_AS_STRNCPY |
3094 | # ifndef USE_AS_STRCAT |
3095 | CFI_PUSH (%esi) |
3096 | CFI_PUSH (%edi) |
3097 | # endif |
3098 | .p2align 4 |
3099 | L(StrncpyLeaveCase2OrCase3): |
3100 | test %eax, %eax |
3101 | jnz L(Aligned64LeaveCase2) |
3102 | |
3103 | L(Aligned64LeaveCase3): |
3104 | add $48, %ebx |
3105 | jle L(CopyFrom1To16BytesCase3) |
3106 | movaps %xmm4, -64(%edx) |
3107 | lea 16(%esi), %esi |
3108 | sub $16, %ebx |
3109 | jbe L(CopyFrom1To16BytesCase3) |
3110 | movaps %xmm5, -48(%edx) |
3111 | lea 16(%esi), %esi |
3112 | sub $16, %ebx |
3113 | jbe L(CopyFrom1To16BytesCase3) |
3114 | movaps %xmm6, -32(%edx) |
3115 | lea 16(%esi), %esi |
3116 | lea -16(%ebx), %ebx |
3117 | jmp L(CopyFrom1To16BytesCase3) |
3118 | |
3119 | L(Aligned64LeaveCase2): |
3120 | pcmpeqb %xmm4, %xmm0 |
3121 | pmovmskb %xmm0, %eax |
3122 | add $48, %ebx |
3123 | jle L(CopyFrom1To16BytesCase2OrCase3) |
3124 | test %eax, %eax |
3125 | jnz L(CopyFrom1To16Bytes) |
3126 | |
3127 | pcmpeqb %xmm5, %xmm0 |
3128 | pmovmskb %xmm0, %eax |
3129 | movaps %xmm4, -64(%edx) |
3130 | lea 16(%esi), %esi |
3131 | sub $16, %ebx |
3132 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
3133 | test %eax, %eax |
3134 | jnz L(CopyFrom1To16Bytes) |
3135 | |
3136 | pcmpeqb %xmm6, %xmm0 |
3137 | pmovmskb %xmm0, %eax |
3138 | movaps %xmm5, -48(%edx) |
3139 | lea 16(%esi), %esi |
3140 | sub $16, %ebx |
3141 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
3142 | test %eax, %eax |
3143 | jnz L(CopyFrom1To16Bytes) |
3144 | |
3145 | pcmpeqb %xmm7, %xmm0 |
3146 | pmovmskb %xmm0, %eax |
3147 | movaps %xmm6, -32(%edx) |
3148 | lea 16(%esi), %esi |
3149 | lea -16(%ebx), %ebx |
3150 | jmp L(CopyFrom1To16BytesCase2) |
3151 | |
3152 | /*--------------------------------------------------*/ |
3153 | .p2align 4 |
3154 | L(StrncpyExit1Case2OrCase3): |
3155 | movlpd (%ecx), %xmm0 |
3156 | movlpd 7(%ecx), %xmm1 |
3157 | movlpd %xmm0, (%edx) |
3158 | movlpd %xmm1, 7(%edx) |
3159 | mov $15, %esi |
3160 | test %eax, %eax |
3161 | jnz L(CopyFrom1To16BytesCase2) |
3162 | jmp L(CopyFrom1To16BytesCase3) |
3163 | |
3164 | .p2align 4 |
3165 | L(StrncpyExit2Case2OrCase3): |
3166 | movlpd (%ecx), %xmm0 |
3167 | movlpd 6(%ecx), %xmm1 |
3168 | movlpd %xmm0, (%edx) |
3169 | movlpd %xmm1, 6(%edx) |
3170 | mov $14, %esi |
3171 | test %eax, %eax |
3172 | jnz L(CopyFrom1To16BytesCase2) |
3173 | jmp L(CopyFrom1To16BytesCase3) |
3174 | |
3175 | .p2align 4 |
3176 | L(StrncpyExit3Case2OrCase3): |
3177 | movlpd (%ecx), %xmm0 |
3178 | movlpd 5(%ecx), %xmm1 |
3179 | movlpd %xmm0, (%edx) |
3180 | movlpd %xmm1, 5(%edx) |
3181 | mov $13, %esi |
3182 | test %eax, %eax |
3183 | jnz L(CopyFrom1To16BytesCase2) |
3184 | jmp L(CopyFrom1To16BytesCase3) |
3185 | |
3186 | .p2align 4 |
3187 | L(StrncpyExit4Case2OrCase3): |
3188 | movlpd (%ecx), %xmm0 |
3189 | movl 8(%ecx), %esi |
3190 | movlpd %xmm0, (%edx) |
3191 | movl %esi, 8(%edx) |
3192 | mov $12, %esi |
3193 | test %eax, %eax |
3194 | jnz L(CopyFrom1To16BytesCase2) |
3195 | jmp L(CopyFrom1To16BytesCase3) |
3196 | |
3197 | .p2align 4 |
3198 | L(StrncpyExit5Case2OrCase3): |
3199 | movlpd (%ecx), %xmm0 |
3200 | movl 7(%ecx), %esi |
3201 | movlpd %xmm0, (%edx) |
3202 | movl %esi, 7(%edx) |
3203 | mov $11, %esi |
3204 | test %eax, %eax |
3205 | jnz L(CopyFrom1To16BytesCase2) |
3206 | jmp L(CopyFrom1To16BytesCase3) |
3207 | |
3208 | .p2align 4 |
3209 | L(StrncpyExit6Case2OrCase3): |
3210 | movlpd (%ecx), %xmm0 |
3211 | movl 6(%ecx), %esi |
3212 | movlpd %xmm0, (%edx) |
3213 | movl %esi, 6(%edx) |
3214 | mov $10, %esi |
3215 | test %eax, %eax |
3216 | jnz L(CopyFrom1To16BytesCase2) |
3217 | jmp L(CopyFrom1To16BytesCase3) |
3218 | |
3219 | .p2align 4 |
3220 | L(StrncpyExit7Case2OrCase3): |
3221 | movlpd (%ecx), %xmm0 |
3222 | movl 5(%ecx), %esi |
3223 | movlpd %xmm0, (%edx) |
3224 | movl %esi, 5(%edx) |
3225 | mov $9, %esi |
3226 | test %eax, %eax |
3227 | jnz L(CopyFrom1To16BytesCase2) |
3228 | jmp L(CopyFrom1To16BytesCase3) |
3229 | |
3230 | .p2align 4 |
3231 | L(StrncpyExit8Case2OrCase3): |
3232 | movlpd (%ecx), %xmm0 |
3233 | movlpd %xmm0, (%edx) |
3234 | mov $8, %esi |
3235 | test %eax, %eax |
3236 | jnz L(CopyFrom1To16BytesCase2) |
3237 | jmp L(CopyFrom1To16BytesCase3) |
3238 | |
3239 | .p2align 4 |
3240 | L(StrncpyExit9Case2OrCase3): |
3241 | movlpd (%ecx), %xmm0 |
3242 | movlpd %xmm0, (%edx) |
3243 | mov $7, %esi |
3244 | test %eax, %eax |
3245 | jnz L(CopyFrom1To16BytesCase2) |
3246 | jmp L(CopyFrom1To16BytesCase3) |
3247 | |
3248 | .p2align 4 |
3249 | L(StrncpyExit10Case2OrCase3): |
3250 | movlpd -1(%ecx), %xmm0 |
3251 | movlpd %xmm0, -1(%edx) |
3252 | mov $6, %esi |
3253 | test %eax, %eax |
3254 | jnz L(CopyFrom1To16BytesCase2) |
3255 | jmp L(CopyFrom1To16BytesCase3) |
3256 | |
3257 | .p2align 4 |
3258 | L(StrncpyExit11Case2OrCase3): |
3259 | movlpd -2(%ecx), %xmm0 |
3260 | movlpd %xmm0, -2(%edx) |
3261 | mov $5, %esi |
3262 | test %eax, %eax |
3263 | jnz L(CopyFrom1To16BytesCase2) |
3264 | jmp L(CopyFrom1To16BytesCase3) |
3265 | |
3266 | .p2align 4 |
3267 | L(StrncpyExit12Case2OrCase3): |
3268 | movl (%ecx), %esi |
3269 | movl %esi, (%edx) |
3270 | mov $4, %esi |
3271 | test %eax, %eax |
3272 | jnz L(CopyFrom1To16BytesCase2) |
3273 | jmp L(CopyFrom1To16BytesCase3) |
3274 | |
3275 | .p2align 4 |
3276 | L(StrncpyExit13Case2OrCase3): |
3277 | movl -1(%ecx), %esi |
3278 | movl %esi, -1(%edx) |
3279 | mov $3, %esi |
3280 | test %eax, %eax |
3281 | jnz L(CopyFrom1To16BytesCase2) |
3282 | jmp L(CopyFrom1To16BytesCase3) |
3283 | |
3284 | .p2align 4 |
3285 | L(StrncpyExit14Case2OrCase3): |
3286 | movl -2(%ecx), %esi |
3287 | movl %esi, -2(%edx) |
3288 | mov $2, %esi |
3289 | test %eax, %eax |
3290 | jnz L(CopyFrom1To16BytesCase2) |
3291 | jmp L(CopyFrom1To16BytesCase3) |
3292 | |
3293 | .p2align 4 |
3294 | L(StrncpyExit15Case2OrCase3): |
3295 | movl -3(%ecx), %esi |
3296 | movl %esi, -3(%edx) |
3297 | mov $1, %esi |
3298 | test %eax, %eax |
3299 | jnz L(CopyFrom1To16BytesCase2) |
3300 | jmp L(CopyFrom1To16BytesCase3) |
3301 | |
3302 | L(StrncpyLeave1): |
3303 | movaps %xmm2, %xmm3 |
3304 | add $48, %ebx |
3305 | jle L(StrncpyExit1) |
3306 | palignr $1, %xmm1, %xmm2 |
3307 | movaps %xmm2, (%edx) |
3308 | movaps 31(%ecx), %xmm2 |
3309 | lea 16(%esi), %esi |
3310 | sub $16, %ebx |
3311 | jbe L(StrncpyExit1) |
3312 | palignr $1, %xmm3, %xmm2 |
3313 | movaps %xmm2, 16(%edx) |
3314 | lea 16(%esi), %esi |
3315 | sub $16, %ebx |
3316 | jbe L(StrncpyExit1) |
3317 | movaps %xmm4, 32(%edx) |
3318 | lea 16(%esi), %esi |
3319 | sub $16, %ebx |
3320 | jbe L(StrncpyExit1) |
3321 | movaps %xmm5, 48(%edx) |
3322 | lea 16(%esi), %esi |
3323 | lea -16(%ebx), %ebx |
3324 | L(StrncpyExit1): |
3325 | lea 15(%edx, %esi), %edx |
3326 | lea 15(%ecx, %esi), %ecx |
3327 | movdqu -16(%ecx), %xmm0 |
3328 | xor %esi, %esi |
3329 | movdqu %xmm0, -16(%edx) |
3330 | jmp L(CopyFrom1To16BytesCase3) |
3331 | |
3332 | L(StrncpyLeave2): |
3333 | movaps %xmm2, %xmm3 |
3334 | add $48, %ebx |
3335 | jle L(StrncpyExit2) |
3336 | palignr $2, %xmm1, %xmm2 |
3337 | movaps %xmm2, (%edx) |
3338 | movaps 30(%ecx), %xmm2 |
3339 | lea 16(%esi), %esi |
3340 | sub $16, %ebx |
3341 | jbe L(StrncpyExit2) |
3342 | palignr $2, %xmm3, %xmm2 |
3343 | movaps %xmm2, 16(%edx) |
3344 | lea 16(%esi), %esi |
3345 | sub $16, %ebx |
3346 | jbe L(StrncpyExit2) |
3347 | movaps %xmm4, 32(%edx) |
3348 | lea 16(%esi), %esi |
3349 | sub $16, %ebx |
3350 | jbe L(StrncpyExit2) |
3351 | movaps %xmm5, 48(%edx) |
3352 | lea 16(%esi), %esi |
3353 | lea -16(%ebx), %ebx |
3354 | L(StrncpyExit2): |
3355 | lea 14(%edx, %esi), %edx |
3356 | lea 14(%ecx, %esi), %ecx |
3357 | movdqu -16(%ecx), %xmm0 |
3358 | xor %esi, %esi |
3359 | movdqu %xmm0, -16(%edx) |
3360 | jmp L(CopyFrom1To16BytesCase3) |
3361 | |
3362 | L(StrncpyLeave3): |
3363 | movaps %xmm2, %xmm3 |
3364 | add $48, %ebx |
3365 | jle L(StrncpyExit3) |
3366 | palignr $3, %xmm1, %xmm2 |
3367 | movaps %xmm2, (%edx) |
3368 | movaps 29(%ecx), %xmm2 |
3369 | lea 16(%esi), %esi |
3370 | sub $16, %ebx |
3371 | jbe L(StrncpyExit3) |
3372 | palignr $3, %xmm3, %xmm2 |
3373 | movaps %xmm2, 16(%edx) |
3374 | lea 16(%esi), %esi |
3375 | sub $16, %ebx |
3376 | jbe L(StrncpyExit3) |
3377 | movaps %xmm4, 32(%edx) |
3378 | lea 16(%esi), %esi |
3379 | sub $16, %ebx |
3380 | jbe L(StrncpyExit3) |
3381 | movaps %xmm5, 48(%edx) |
3382 | lea 16(%esi), %esi |
3383 | lea -16(%ebx), %ebx |
3384 | L(StrncpyExit3): |
3385 | lea 13(%edx, %esi), %edx |
3386 | lea 13(%ecx, %esi), %ecx |
3387 | movdqu -16(%ecx), %xmm0 |
3388 | xor %esi, %esi |
3389 | movdqu %xmm0, -16(%edx) |
3390 | jmp L(CopyFrom1To16BytesCase3) |
3391 | |
3392 | L(StrncpyLeave4): |
3393 | movaps %xmm2, %xmm3 |
3394 | add $48, %ebx |
3395 | jle L(StrncpyExit4) |
3396 | palignr $4, %xmm1, %xmm2 |
3397 | movaps %xmm2, (%edx) |
3398 | movaps 28(%ecx), %xmm2 |
3399 | lea 16(%esi), %esi |
3400 | sub $16, %ebx |
3401 | jbe L(StrncpyExit4) |
3402 | palignr $4, %xmm3, %xmm2 |
3403 | movaps %xmm2, 16(%edx) |
3404 | lea 16(%esi), %esi |
3405 | sub $16, %ebx |
3406 | jbe L(StrncpyExit4) |
3407 | movaps %xmm4, 32(%edx) |
3408 | lea 16(%esi), %esi |
3409 | sub $16, %ebx |
3410 | jbe L(StrncpyExit4) |
3411 | movaps %xmm5, 48(%edx) |
3412 | lea 16(%esi), %esi |
3413 | lea -16(%ebx), %ebx |
3414 | L(StrncpyExit4): |
3415 | lea 12(%edx, %esi), %edx |
3416 | lea 12(%ecx, %esi), %ecx |
3417 | movlpd -12(%ecx), %xmm0 |
3418 | movl -4(%ecx), %eax |
3419 | movlpd %xmm0, -12(%edx) |
3420 | movl %eax, -4(%edx) |
3421 | xor %esi, %esi |
3422 | jmp L(CopyFrom1To16BytesCase3) |
3423 | |
3424 | L(StrncpyLeave5): |
3425 | movaps %xmm2, %xmm3 |
3426 | add $48, %ebx |
3427 | jle L(StrncpyExit5) |
3428 | palignr $5, %xmm1, %xmm2 |
3429 | movaps %xmm2, (%edx) |
3430 | movaps 27(%ecx), %xmm2 |
3431 | lea 16(%esi), %esi |
3432 | sub $16, %ebx |
3433 | jbe L(StrncpyExit5) |
3434 | palignr $5, %xmm3, %xmm2 |
3435 | movaps %xmm2, 16(%edx) |
3436 | lea 16(%esi), %esi |
3437 | sub $16, %ebx |
3438 | jbe L(StrncpyExit5) |
3439 | movaps %xmm4, 32(%edx) |
3440 | lea 16(%esi), %esi |
3441 | sub $16, %ebx |
3442 | jbe L(StrncpyExit5) |
3443 | movaps %xmm5, 48(%edx) |
3444 | lea 16(%esi), %esi |
3445 | lea -16(%ebx), %ebx |
3446 | L(StrncpyExit5): |
3447 | lea 11(%edx, %esi), %edx |
3448 | lea 11(%ecx, %esi), %ecx |
3449 | movlpd -11(%ecx), %xmm0 |
3450 | movl -4(%ecx), %eax |
3451 | movlpd %xmm0, -11(%edx) |
3452 | movl %eax, -4(%edx) |
3453 | xor %esi, %esi |
3454 | jmp L(CopyFrom1To16BytesCase3) |
3455 | |
3456 | L(StrncpyLeave6): |
3457 | movaps %xmm2, %xmm3 |
3458 | add $48, %ebx |
3459 | jle L(StrncpyExit6) |
3460 | palignr $6, %xmm1, %xmm2 |
3461 | movaps %xmm2, (%edx) |
3462 | movaps 26(%ecx), %xmm2 |
3463 | lea 16(%esi), %esi |
3464 | sub $16, %ebx |
3465 | jbe L(StrncpyExit6) |
3466 | palignr $6, %xmm3, %xmm2 |
3467 | movaps %xmm2, 16(%edx) |
3468 | lea 16(%esi), %esi |
3469 | sub $16, %ebx |
3470 | jbe L(StrncpyExit6) |
3471 | movaps %xmm4, 32(%edx) |
3472 | lea 16(%esi), %esi |
3473 | sub $16, %ebx |
3474 | jbe L(StrncpyExit6) |
3475 | movaps %xmm5, 48(%edx) |
3476 | lea 16(%esi), %esi |
3477 | lea -16(%ebx), %ebx |
3478 | L(StrncpyExit6): |
3479 | lea 10(%edx, %esi), %edx |
3480 | lea 10(%ecx, %esi), %ecx |
3481 | |
3482 | movlpd -10(%ecx), %xmm0 |
3483 | movw -2(%ecx), %ax |
3484 | movlpd %xmm0, -10(%edx) |
3485 | movw %ax, -2(%edx) |
3486 | xor %esi, %esi |
3487 | jmp L(CopyFrom1To16BytesCase3) |
3488 | |
3489 | L(StrncpyLeave7): |
3490 | movaps %xmm2, %xmm3 |
3491 | add $48, %ebx |
3492 | jle L(StrncpyExit7) |
3493 | palignr $7, %xmm1, %xmm2 |
3494 | movaps %xmm2, (%edx) |
3495 | movaps 25(%ecx), %xmm2 |
3496 | lea 16(%esi), %esi |
3497 | sub $16, %ebx |
3498 | jbe L(StrncpyExit7) |
3499 | palignr $7, %xmm3, %xmm2 |
3500 | movaps %xmm2, 16(%edx) |
3501 | lea 16(%esi), %esi |
3502 | sub $16, %ebx |
3503 | jbe L(StrncpyExit7) |
3504 | movaps %xmm4, 32(%edx) |
3505 | lea 16(%esi), %esi |
3506 | sub $16, %ebx |
3507 | jbe L(StrncpyExit7) |
3508 | movaps %xmm5, 48(%edx) |
3509 | lea 16(%esi), %esi |
3510 | lea -16(%ebx), %ebx |
3511 | L(StrncpyExit7): |
3512 | lea 9(%edx, %esi), %edx |
3513 | lea 9(%ecx, %esi), %ecx |
3514 | |
3515 | movlpd -9(%ecx), %xmm0 |
3516 | movb -1(%ecx), %ah |
3517 | movlpd %xmm0, -9(%edx) |
3518 | movb %ah, -1(%edx) |
3519 | xor %esi, %esi |
3520 | jmp L(CopyFrom1To16BytesCase3) |
3521 | |
3522 | L(StrncpyLeave8): |
3523 | movaps %xmm2, %xmm3 |
3524 | add $48, %ebx |
3525 | jle L(StrncpyExit8) |
3526 | palignr $8, %xmm1, %xmm2 |
3527 | movaps %xmm2, (%edx) |
3528 | movaps 24(%ecx), %xmm2 |
3529 | lea 16(%esi), %esi |
3530 | sub $16, %ebx |
3531 | jbe L(StrncpyExit8) |
3532 | palignr $8, %xmm3, %xmm2 |
3533 | movaps %xmm2, 16(%edx) |
3534 | lea 16(%esi), %esi |
3535 | sub $16, %ebx |
3536 | jbe L(StrncpyExit8) |
3537 | movaps %xmm4, 32(%edx) |
3538 | lea 16(%esi), %esi |
3539 | sub $16, %ebx |
3540 | jbe L(StrncpyExit8) |
3541 | movaps %xmm5, 48(%edx) |
3542 | lea 16(%esi), %esi |
3543 | lea -16(%ebx), %ebx |
3544 | L(StrncpyExit8): |
3545 | lea 8(%edx, %esi), %edx |
3546 | lea 8(%ecx, %esi), %ecx |
3547 | movlpd -8(%ecx), %xmm0 |
3548 | movlpd %xmm0, -8(%edx) |
3549 | xor %esi, %esi |
3550 | jmp L(CopyFrom1To16BytesCase3) |
3551 | |
3552 | L(StrncpyLeave9): |
3553 | movaps %xmm2, %xmm3 |
3554 | add $48, %ebx |
3555 | jle L(StrncpyExit9) |
3556 | palignr $9, %xmm1, %xmm2 |
3557 | movaps %xmm2, (%edx) |
3558 | movaps 23(%ecx), %xmm2 |
3559 | lea 16(%esi), %esi |
3560 | sub $16, %ebx |
3561 | jbe L(StrncpyExit9) |
3562 | palignr $9, %xmm3, %xmm2 |
3563 | movaps %xmm2, 16(%edx) |
3564 | lea 16(%esi), %esi |
3565 | sub $16, %ebx |
3566 | jbe L(StrncpyExit9) |
3567 | movaps %xmm4, 32(%edx) |
3568 | lea 16(%esi), %esi |
3569 | sub $16, %ebx |
3570 | jbe L(StrncpyExit9) |
3571 | movaps %xmm5, 48(%edx) |
3572 | lea 16(%esi), %esi |
3573 | lea -16(%ebx), %ebx |
3574 | L(StrncpyExit9): |
3575 | lea 7(%edx, %esi), %edx |
3576 | lea 7(%ecx, %esi), %ecx |
3577 | |
3578 | movlpd -8(%ecx), %xmm0 |
3579 | movlpd %xmm0, -8(%edx) |
3580 | xor %esi, %esi |
3581 | jmp L(CopyFrom1To16BytesCase3) |
3582 | |
3583 | L(StrncpyLeave10): |
3584 | movaps %xmm2, %xmm3 |
3585 | add $48, %ebx |
3586 | jle L(StrncpyExit10) |
3587 | palignr $10, %xmm1, %xmm2 |
3588 | movaps %xmm2, (%edx) |
3589 | movaps 22(%ecx), %xmm2 |
3590 | lea 16(%esi), %esi |
3591 | sub $16, %ebx |
3592 | jbe L(StrncpyExit10) |
3593 | palignr $10, %xmm3, %xmm2 |
3594 | movaps %xmm2, 16(%edx) |
3595 | lea 16(%esi), %esi |
3596 | sub $16, %ebx |
3597 | jbe L(StrncpyExit10) |
3598 | movaps %xmm4, 32(%edx) |
3599 | lea 16(%esi), %esi |
3600 | sub $16, %ebx |
3601 | jbe L(StrncpyExit10) |
3602 | movaps %xmm5, 48(%edx) |
3603 | lea 16(%esi), %esi |
3604 | lea -16(%ebx), %ebx |
3605 | L(StrncpyExit10): |
3606 | lea 6(%edx, %esi), %edx |
3607 | lea 6(%ecx, %esi), %ecx |
3608 | |
3609 | movlpd -8(%ecx), %xmm0 |
3610 | movlpd %xmm0, -8(%edx) |
3611 | xor %esi, %esi |
3612 | jmp L(CopyFrom1To16BytesCase3) |
3613 | |
3614 | L(StrncpyLeave11): |
3615 | movaps %xmm2, %xmm3 |
3616 | add $48, %ebx |
3617 | jle L(StrncpyExit11) |
3618 | palignr $11, %xmm1, %xmm2 |
3619 | movaps %xmm2, (%edx) |
3620 | movaps 21(%ecx), %xmm2 |
3621 | lea 16(%esi), %esi |
3622 | sub $16, %ebx |
3623 | jbe L(StrncpyExit11) |
3624 | palignr $11, %xmm3, %xmm2 |
3625 | movaps %xmm2, 16(%edx) |
3626 | lea 16(%esi), %esi |
3627 | sub $16, %ebx |
3628 | jbe L(StrncpyExit11) |
3629 | movaps %xmm4, 32(%edx) |
3630 | lea 16(%esi), %esi |
3631 | sub $16, %ebx |
3632 | jbe L(StrncpyExit11) |
3633 | movaps %xmm5, 48(%edx) |
3634 | lea 16(%esi), %esi |
3635 | lea -16(%ebx), %ebx |
3636 | L(StrncpyExit11): |
3637 | lea 5(%edx, %esi), %edx |
3638 | lea 5(%ecx, %esi), %ecx |
3639 | movl -5(%ecx), %esi |
3640 | movb -1(%ecx), %ah |
3641 | movl %esi, -5(%edx) |
3642 | movb %ah, -1(%edx) |
3643 | xor %esi, %esi |
3644 | jmp L(CopyFrom1To16BytesCase3) |
3645 | |
3646 | L(StrncpyLeave12): |
3647 | movaps %xmm2, %xmm3 |
3648 | add $48, %ebx |
3649 | jle L(StrncpyExit12) |
3650 | palignr $12, %xmm1, %xmm2 |
3651 | movaps %xmm2, (%edx) |
3652 | movaps 20(%ecx), %xmm2 |
3653 | lea 16(%esi), %esi |
3654 | sub $16, %ebx |
3655 | jbe L(StrncpyExit12) |
3656 | palignr $12, %xmm3, %xmm2 |
3657 | movaps %xmm2, 16(%edx) |
3658 | lea 16(%esi), %esi |
3659 | sub $16, %ebx |
3660 | jbe L(StrncpyExit12) |
3661 | movaps %xmm4, 32(%edx) |
3662 | lea 16(%esi), %esi |
3663 | sub $16, %ebx |
3664 | jbe L(StrncpyExit12) |
3665 | movaps %xmm5, 48(%edx) |
3666 | lea 16(%esi), %esi |
3667 | lea -16(%ebx), %ebx |
3668 | L(StrncpyExit12): |
3669 | lea 4(%edx, %esi), %edx |
3670 | lea 4(%ecx, %esi), %ecx |
3671 | movl -4(%ecx), %eax |
3672 | movl %eax, -4(%edx) |
3673 | xor %esi, %esi |
3674 | jmp L(CopyFrom1To16BytesCase3) |
3675 | |
3676 | L(StrncpyLeave13): |
3677 | movaps %xmm2, %xmm3 |
3678 | add $48, %ebx |
3679 | jle L(StrncpyExit13) |
3680 | palignr $13, %xmm1, %xmm2 |
3681 | movaps %xmm2, (%edx) |
3682 | movaps 19(%ecx), %xmm2 |
3683 | lea 16(%esi), %esi |
3684 | sub $16, %ebx |
3685 | jbe L(StrncpyExit13) |
3686 | palignr $13, %xmm3, %xmm2 |
3687 | movaps %xmm2, 16(%edx) |
3688 | lea 16(%esi), %esi |
3689 | sub $16, %ebx |
3690 | jbe L(StrncpyExit13) |
3691 | movaps %xmm4, 32(%edx) |
3692 | lea 16(%esi), %esi |
3693 | sub $16, %ebx |
3694 | jbe L(StrncpyExit13) |
3695 | movaps %xmm5, 48(%edx) |
3696 | lea 16(%esi), %esi |
3697 | lea -16(%ebx), %ebx |
3698 | L(StrncpyExit13): |
3699 | lea 3(%edx, %esi), %edx |
3700 | lea 3(%ecx, %esi), %ecx |
3701 | |
3702 | movl -4(%ecx), %eax |
3703 | movl %eax, -4(%edx) |
3704 | xor %esi, %esi |
3705 | jmp L(CopyFrom1To16BytesCase3) |
3706 | |
3707 | L(StrncpyLeave14): |
3708 | movaps %xmm2, %xmm3 |
3709 | add $48, %ebx |
3710 | jle L(StrncpyExit14) |
3711 | palignr $14, %xmm1, %xmm2 |
3712 | movaps %xmm2, (%edx) |
3713 | movaps 18(%ecx), %xmm2 |
3714 | lea 16(%esi), %esi |
3715 | sub $16, %ebx |
3716 | jbe L(StrncpyExit14) |
3717 | palignr $14, %xmm3, %xmm2 |
3718 | movaps %xmm2, 16(%edx) |
3719 | lea 16(%esi), %esi |
3720 | sub $16, %ebx |
3721 | jbe L(StrncpyExit14) |
3722 | movaps %xmm4, 32(%edx) |
3723 | lea 16(%esi), %esi |
3724 | sub $16, %ebx |
3725 | jbe L(StrncpyExit14) |
3726 | movaps %xmm5, 48(%edx) |
3727 | lea 16(%esi), %esi |
3728 | lea -16(%ebx), %ebx |
3729 | L(StrncpyExit14): |
3730 | lea 2(%edx, %esi), %edx |
3731 | lea 2(%ecx, %esi), %ecx |
3732 | movw -2(%ecx), %ax |
3733 | movw %ax, -2(%edx) |
3734 | xor %esi, %esi |
3735 | jmp L(CopyFrom1To16BytesCase3) |
3736 | |
3737 | L(StrncpyLeave15): |
3738 | movaps %xmm2, %xmm3 |
3739 | add $48, %ebx |
3740 | jle L(StrncpyExit15) |
3741 | palignr $15, %xmm1, %xmm2 |
3742 | movaps %xmm2, (%edx) |
3743 | movaps 17(%ecx), %xmm2 |
3744 | lea 16(%esi), %esi |
3745 | sub $16, %ebx |
3746 | jbe L(StrncpyExit15) |
3747 | palignr $15, %xmm3, %xmm2 |
3748 | movaps %xmm2, 16(%edx) |
3749 | lea 16(%esi), %esi |
3750 | sub $16, %ebx |
3751 | jbe L(StrncpyExit15) |
3752 | movaps %xmm4, 32(%edx) |
3753 | lea 16(%esi), %esi |
3754 | sub $16, %ebx |
3755 | jbe L(StrncpyExit15) |
3756 | movaps %xmm5, 48(%edx) |
3757 | lea 16(%esi), %esi |
3758 | lea -16(%ebx), %ebx |
3759 | L(StrncpyExit15): |
3760 | lea 1(%edx, %esi), %edx |
3761 | lea 1(%ecx, %esi), %ecx |
3762 | movb -1(%ecx), %ah |
3763 | movb %ah, -1(%edx) |
3764 | xor %esi, %esi |
3765 | jmp L(CopyFrom1To16BytesCase3) |
3766 | # endif |
3767 | |
3768 | # ifndef USE_AS_STRCAT |
3769 | # ifdef USE_AS_STRNCPY |
3770 | CFI_POP (%esi) |
3771 | CFI_POP (%edi) |
3772 | |
3773 | .p2align 4 |
3774 | L(ExitTail0): |
3775 | movl %edx, %eax |
3776 | RETURN |
3777 | |
3778 | .p2align 4 |
3779 | L(StrncpyExit15Bytes): |
3780 | cmp $12, %ebx |
3781 | jbe L(StrncpyExit12Bytes) |
3782 | cmpb $0, 8(%ecx) |
3783 | jz L(ExitTail9) |
3784 | cmpb $0, 9(%ecx) |
3785 | jz L(ExitTail10) |
3786 | cmpb $0, 10(%ecx) |
3787 | jz L(ExitTail11) |
3788 | cmpb $0, 11(%ecx) |
3789 | jz L(ExitTail12) |
3790 | cmp $13, %ebx |
3791 | je L(ExitTail13) |
3792 | cmpb $0, 12(%ecx) |
3793 | jz L(ExitTail13) |
3794 | cmp $14, %ebx |
3795 | je L(ExitTail14) |
3796 | cmpb $0, 13(%ecx) |
3797 | jz L(ExitTail14) |
3798 | movlpd (%ecx), %xmm0 |
3799 | movlpd 7(%ecx), %xmm1 |
3800 | movlpd %xmm0, (%edx) |
3801 | movlpd %xmm1, 7(%edx) |
3802 | # ifdef USE_AS_STPCPY |
3803 | lea 14(%edx), %eax |
3804 | cmpb $1, (%eax) |
3805 | sbb $-1, %eax |
3806 | # else |
3807 | movl %edx, %eax |
3808 | # endif |
3809 | RETURN |
3810 | |
3811 | .p2align 4 |
3812 | L(StrncpyExit12Bytes): |
3813 | cmp $9, %ebx |
3814 | je L(ExitTail9) |
3815 | cmpb $0, 8(%ecx) |
3816 | jz L(ExitTail9) |
3817 | cmp $10, %ebx |
3818 | je L(ExitTail10) |
3819 | cmpb $0, 9(%ecx) |
3820 | jz L(ExitTail10) |
3821 | cmp $11, %ebx |
3822 | je L(ExitTail11) |
3823 | cmpb $0, 10(%ecx) |
3824 | jz L(ExitTail11) |
3825 | movlpd (%ecx), %xmm0 |
3826 | movl 8(%ecx), %eax |
3827 | movlpd %xmm0, (%edx) |
3828 | movl %eax, 8(%edx) |
3829 | SAVE_RESULT_TAIL (11) |
3830 | # ifdef USE_AS_STPCPY |
3831 | cmpb $1, (%eax) |
3832 | sbb $-1, %eax |
3833 | # endif |
3834 | RETURN |
3835 | |
3836 | .p2align 4 |
3837 | L(StrncpyExit8Bytes): |
3838 | cmp $4, %ebx |
3839 | jbe L(StrncpyExit4Bytes) |
3840 | cmpb $0, (%ecx) |
3841 | jz L(ExitTail1) |
3842 | cmpb $0, 1(%ecx) |
3843 | jz L(ExitTail2) |
3844 | cmpb $0, 2(%ecx) |
3845 | jz L(ExitTail3) |
3846 | cmpb $0, 3(%ecx) |
3847 | jz L(ExitTail4) |
3848 | |
3849 | cmp $5, %ebx |
3850 | je L(ExitTail5) |
3851 | cmpb $0, 4(%ecx) |
3852 | jz L(ExitTail5) |
3853 | cmp $6, %ebx |
3854 | je L(ExitTail6) |
3855 | cmpb $0, 5(%ecx) |
3856 | jz L(ExitTail6) |
3857 | cmp $7, %ebx |
3858 | je L(ExitTail7) |
3859 | cmpb $0, 6(%ecx) |
3860 | jz L(ExitTail7) |
3861 | movlpd (%ecx), %xmm0 |
3862 | movlpd %xmm0, (%edx) |
3863 | # ifdef USE_AS_STPCPY |
3864 | lea 7(%edx), %eax |
3865 | cmpb $1, (%eax) |
3866 | sbb $-1, %eax |
3867 | # else |
3868 | movl %edx, %eax |
3869 | # endif |
3870 | RETURN |
3871 | |
3872 | .p2align 4 |
3873 | L(StrncpyExit4Bytes): |
3874 | test %ebx, %ebx |
3875 | jz L(ExitTail0) |
3876 | cmp $1, %ebx |
3877 | je L(ExitTail1) |
3878 | cmpb $0, (%ecx) |
3879 | jz L(ExitTail1) |
3880 | cmp $2, %ebx |
3881 | je L(ExitTail2) |
3882 | cmpb $0, 1(%ecx) |
3883 | jz L(ExitTail2) |
3884 | cmp $3, %ebx |
3885 | je L(ExitTail3) |
3886 | cmpb $0, 2(%ecx) |
3887 | jz L(ExitTail3) |
3888 | movl (%ecx), %eax |
3889 | movl %eax, (%edx) |
3890 | SAVE_RESULT_TAIL (3) |
3891 | # ifdef USE_AS_STPCPY |
3892 | cmpb $1, (%eax) |
3893 | sbb $-1, %eax |
3894 | # endif |
3895 | RETURN |
3896 | # endif |
3897 | |
3898 | END (STRCPY) |
3899 | # endif |
3900 | #endif |
3901 | |