1 | /* strcpy with SSE2 and unaligned load |
2 | Copyright (C) 2011-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | |
20 | #if IS_IN (libc) |
21 | |
22 | # include <sysdep.h> |
23 | |
24 | |
25 | # define CFI_PUSH(REG) \ |
26 | cfi_adjust_cfa_offset (4); \ |
27 | cfi_rel_offset (REG, 0) |
28 | |
29 | # define CFI_POP(REG) \ |
30 | cfi_adjust_cfa_offset (-4); \ |
31 | cfi_restore (REG) |
32 | |
33 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) |
34 | # define POP(REG) popl REG; CFI_POP (REG) |
35 | |
36 | # ifndef STRCPY |
37 | # define STRCPY __strcpy_sse2 |
38 | # endif |
39 | |
40 | # define STR1 PARMS |
41 | # define STR2 STR1+4 |
42 | # define LEN STR2+4 |
43 | |
44 | # ifdef USE_AS_STRNCPY |
45 | # define PARMS 16 |
46 | # define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi) |
47 | # define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \ |
48 | CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi); |
49 | |
50 | # ifdef PIC |
51 | # define JMPTBL(I, B) I - B |
52 | |
53 | /* Load an entry in a jump table into ECX and branch to it. TABLE is a |
54 | jump table with relative offsets. |
55 | INDEX is a register contains the index into the jump table. |
56 | SCALE is the scale of INDEX. */ |
57 | |
58 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ |
59 | /* We first load PC into ECX. */ \ |
60 | SETUP_PIC_REG(cx); \ |
61 | /* Get the address of the jump table. */ \ |
62 | addl $(TABLE - .), %ecx; \ |
63 | /* Get the entry and convert the relative offset to the \ |
64 | absolute address. */ \ |
65 | addl (%ecx,INDEX,SCALE), %ecx; \ |
66 | /* We loaded the jump table and adjusted ECX. Go. */ \ |
67 | jmp *%ecx |
68 | # else |
69 | # define JMPTBL(I, B) I |
70 | |
71 | /* Branch to an entry in a jump table. TABLE is a jump table with |
72 | absolute offsets. INDEX is a register contains the index into the |
73 | jump table. SCALE is the scale of INDEX. */ |
74 | |
75 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ |
76 | jmp *TABLE(,INDEX,SCALE) |
77 | # endif |
78 | |
79 | .text |
80 | ENTRY (STRCPY) |
81 | ENTRANCE |
82 | mov STR1(%esp), %edi |
83 | mov STR2(%esp), %esi |
84 | movl LEN(%esp), %ebx |
85 | test %ebx, %ebx |
86 | jz L(ExitZero) |
87 | |
88 | mov %esi, %ecx |
89 | # ifndef USE_AS_STPCPY |
90 | mov %edi, %eax /* save result */ |
91 | # endif |
92 | and $15, %ecx |
93 | jz L(SourceStringAlignmentZero) |
94 | |
95 | and $-16, %esi |
96 | pxor %xmm0, %xmm0 |
97 | pxor %xmm1, %xmm1 |
98 | |
99 | pcmpeqb (%esi), %xmm1 |
100 | add %ecx, %ebx |
101 | pmovmskb %xmm1, %edx |
102 | shr %cl, %edx |
103 | # ifdef USE_AS_STPCPY |
104 | cmp $16, %ebx |
105 | jbe L(CopyFrom1To16BytesTailCase2OrCase3) |
106 | # else |
107 | cmp $17, %ebx |
108 | jbe L(CopyFrom1To16BytesTailCase2OrCase3) |
109 | # endif |
110 | test %edx, %edx |
111 | jnz L(CopyFrom1To16BytesTail) |
112 | |
113 | pcmpeqb 16(%esi), %xmm0 |
114 | pmovmskb %xmm0, %edx |
115 | # ifdef USE_AS_STPCPY |
116 | cmp $32, %ebx |
117 | jbe L(CopyFrom1To32BytesCase2OrCase3) |
118 | # else |
119 | cmp $33, %ebx |
120 | jbe L(CopyFrom1To32BytesCase2OrCase3) |
121 | # endif |
122 | test %edx, %edx |
123 | jnz L(CopyFrom1To32Bytes) |
124 | |
125 | movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ |
126 | movdqu %xmm1, (%edi) |
127 | |
128 | sub %ecx, %edi |
129 | |
130 | /* If source address alignment != destination address alignment */ |
131 | .p2align 4 |
132 | L(Unalign16Both): |
133 | mov $16, %ecx |
134 | movdqa (%esi, %ecx), %xmm1 |
135 | movaps 16(%esi, %ecx), %xmm2 |
136 | movdqu %xmm1, (%edi, %ecx) |
137 | pcmpeqb %xmm2, %xmm0 |
138 | pmovmskb %xmm0, %edx |
139 | add $16, %ecx |
140 | sub $48, %ebx |
141 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
142 | test %edx, %edx |
143 | jnz L(CopyFrom1To16BytesUnalignedXmm2) |
144 | |
145 | movaps 16(%esi, %ecx), %xmm3 |
146 | movdqu %xmm2, (%edi, %ecx) |
147 | pcmpeqb %xmm3, %xmm0 |
148 | pmovmskb %xmm0, %edx |
149 | add $16, %ecx |
150 | sub $16, %ebx |
151 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
152 | test %edx, %edx |
153 | jnz L(CopyFrom1To16BytesUnalignedXmm3) |
154 | |
155 | movaps 16(%esi, %ecx), %xmm4 |
156 | movdqu %xmm3, (%edi, %ecx) |
157 | pcmpeqb %xmm4, %xmm0 |
158 | pmovmskb %xmm0, %edx |
159 | add $16, %ecx |
160 | sub $16, %ebx |
161 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
162 | test %edx, %edx |
163 | jnz L(CopyFrom1To16BytesUnalignedXmm4) |
164 | |
165 | movaps 16(%esi, %ecx), %xmm1 |
166 | movdqu %xmm4, (%edi, %ecx) |
167 | pcmpeqb %xmm1, %xmm0 |
168 | pmovmskb %xmm0, %edx |
169 | add $16, %ecx |
170 | sub $16, %ebx |
171 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
172 | test %edx, %edx |
173 | jnz L(CopyFrom1To16BytesUnalignedXmm1) |
174 | |
175 | movaps 16(%esi, %ecx), %xmm2 |
176 | movdqu %xmm1, (%edi, %ecx) |
177 | pcmpeqb %xmm2, %xmm0 |
178 | pmovmskb %xmm0, %edx |
179 | add $16, %ecx |
180 | sub $16, %ebx |
181 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
182 | test %edx, %edx |
183 | jnz L(CopyFrom1To16BytesUnalignedXmm2) |
184 | |
185 | movaps 16(%esi, %ecx), %xmm3 |
186 | movdqu %xmm2, (%edi, %ecx) |
187 | pcmpeqb %xmm3, %xmm0 |
188 | pmovmskb %xmm0, %edx |
189 | add $16, %ecx |
190 | sub $16, %ebx |
191 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
192 | test %edx, %edx |
193 | jnz L(CopyFrom1To16BytesUnalignedXmm3) |
194 | |
195 | movdqu %xmm3, (%edi, %ecx) |
196 | mov %esi, %edx |
197 | lea 16(%esi, %ecx), %esi |
198 | and $-0x40, %esi |
199 | sub %esi, %edx |
200 | sub %edx, %edi |
201 | lea 128(%ebx, %edx), %ebx |
202 | |
203 | L(Unaligned64Loop): |
204 | movaps (%esi), %xmm2 |
205 | movaps %xmm2, %xmm4 |
206 | movaps 16(%esi), %xmm5 |
207 | movaps 32(%esi), %xmm3 |
208 | movaps %xmm3, %xmm6 |
209 | movaps 48(%esi), %xmm7 |
210 | pminub %xmm5, %xmm2 |
211 | pminub %xmm7, %xmm3 |
212 | pminub %xmm2, %xmm3 |
213 | pcmpeqb %xmm0, %xmm3 |
214 | pmovmskb %xmm3, %edx |
215 | sub $64, %ebx |
216 | jbe L(UnalignedLeaveCase2OrCase3) |
217 | test %edx, %edx |
218 | jnz L(Unaligned64Leave) |
219 | L(Unaligned64Loop_start): |
220 | add $64, %edi |
221 | add $64, %esi |
222 | movdqu %xmm4, -64(%edi) |
223 | movaps (%esi), %xmm2 |
224 | movdqa %xmm2, %xmm4 |
225 | movdqu %xmm5, -48(%edi) |
226 | movaps 16(%esi), %xmm5 |
227 | pminub %xmm5, %xmm2 |
228 | movaps 32(%esi), %xmm3 |
229 | movdqu %xmm6, -32(%edi) |
230 | movaps %xmm3, %xmm6 |
231 | movdqu %xmm7, -16(%edi) |
232 | movaps 48(%esi), %xmm7 |
233 | pminub %xmm7, %xmm3 |
234 | pminub %xmm2, %xmm3 |
235 | pcmpeqb %xmm0, %xmm3 |
236 | pmovmskb %xmm3, %edx |
237 | sub $64, %ebx |
238 | jbe L(UnalignedLeaveCase2OrCase3) |
239 | test %edx, %edx |
240 | jz L(Unaligned64Loop_start) |
241 | L(Unaligned64Leave): |
242 | pxor %xmm1, %xmm1 |
243 | |
244 | pcmpeqb %xmm4, %xmm0 |
245 | pcmpeqb %xmm5, %xmm1 |
246 | pmovmskb %xmm0, %edx |
247 | pmovmskb %xmm1, %ecx |
248 | test %edx, %edx |
249 | jnz L(CopyFrom1To16BytesUnaligned_0) |
250 | test %ecx, %ecx |
251 | jnz L(CopyFrom1To16BytesUnaligned_16) |
252 | |
253 | pcmpeqb %xmm6, %xmm0 |
254 | pcmpeqb %xmm7, %xmm1 |
255 | pmovmskb %xmm0, %edx |
256 | pmovmskb %xmm1, %ecx |
257 | test %edx, %edx |
258 | jnz L(CopyFrom1To16BytesUnaligned_32) |
259 | |
260 | bsf %ecx, %edx |
261 | movdqu %xmm4, (%edi) |
262 | movdqu %xmm5, 16(%edi) |
263 | movdqu %xmm6, 32(%edi) |
264 | # ifdef USE_AS_STPCPY |
265 | lea 48(%edi, %edx), %eax |
266 | # endif |
267 | movdqu %xmm7, 48(%edi) |
268 | add $15, %ebx |
269 | sub %edx, %ebx |
270 | lea 49(%edi, %edx), %edi |
271 | jmp L(StrncpyFillTailWithZero) |
272 | |
273 | /* If source address alignment == destination address alignment */ |
274 | |
275 | L(SourceStringAlignmentZero): |
276 | pxor %xmm0, %xmm0 |
277 | movdqa (%esi), %xmm1 |
278 | pcmpeqb %xmm1, %xmm0 |
279 | pmovmskb %xmm0, %edx |
280 | # ifdef USE_AS_STPCPY |
281 | cmp $16, %ebx |
282 | jbe L(CopyFrom1To16BytesTail1Case2OrCase3) |
283 | # else |
284 | cmp $17, %ebx |
285 | jbe L(CopyFrom1To16BytesTail1Case2OrCase3) |
286 | # endif |
287 | test %edx, %edx |
288 | jnz L(CopyFrom1To16BytesTail1) |
289 | |
290 | pcmpeqb 16(%esi), %xmm0 |
291 | movdqu %xmm1, (%edi) |
292 | pmovmskb %xmm0, %edx |
293 | # ifdef USE_AS_STPCPY |
294 | cmp $32, %ebx |
295 | jbe L(CopyFrom1To32Bytes1Case2OrCase3) |
296 | # else |
297 | cmp $33, %ebx |
298 | jbe L(CopyFrom1To32Bytes1Case2OrCase3) |
299 | # endif |
300 | test %edx, %edx |
301 | jnz L(CopyFrom1To32Bytes1) |
302 | |
303 | jmp L(Unalign16Both) |
304 | |
305 | /*-----------------End of main part---------------------------*/ |
306 | |
307 | /* Case1 */ |
308 | .p2align 4 |
309 | L(CopyFrom1To16BytesTail): |
310 | sub %ecx, %ebx |
311 | add %ecx, %esi |
312 | bsf %edx, %edx |
313 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) |
314 | |
315 | .p2align 4 |
316 | L(CopyFrom1To32Bytes1): |
317 | add $16, %esi |
318 | add $16, %edi |
319 | sub $16, %ebx |
320 | L(CopyFrom1To16BytesTail1): |
321 | bsf %edx, %edx |
322 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) |
323 | |
324 | .p2align 4 |
325 | L(CopyFrom1To32Bytes): |
326 | sub %ecx, %ebx |
327 | bsf %edx, %edx |
328 | add %ecx, %esi |
329 | add $16, %edx |
330 | sub %ecx, %edx |
331 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) |
332 | |
333 | .p2align 4 |
334 | L(CopyFrom1To16BytesUnaligned_0): |
335 | bsf %edx, %edx |
336 | # ifdef USE_AS_STPCPY |
337 | lea (%edi, %edx), %eax |
338 | # endif |
339 | movdqu %xmm4, (%edi) |
340 | add $63, %ebx |
341 | sub %edx, %ebx |
342 | lea 1(%edi, %edx), %edi |
343 | jmp L(StrncpyFillTailWithZero) |
344 | |
345 | .p2align 4 |
346 | L(CopyFrom1To16BytesUnaligned_16): |
347 | bsf %ecx, %edx |
348 | movdqu %xmm4, (%edi) |
349 | # ifdef USE_AS_STPCPY |
350 | lea 16(%edi, %edx), %eax |
351 | # endif |
352 | movdqu %xmm5, 16(%edi) |
353 | add $47, %ebx |
354 | sub %edx, %ebx |
355 | lea 17(%edi, %edx), %edi |
356 | jmp L(StrncpyFillTailWithZero) |
357 | |
358 | .p2align 4 |
359 | L(CopyFrom1To16BytesUnaligned_32): |
360 | bsf %edx, %edx |
361 | movdqu %xmm4, (%edi) |
362 | movdqu %xmm5, 16(%edi) |
363 | # ifdef USE_AS_STPCPY |
364 | lea 32(%edi, %edx), %eax |
365 | # endif |
366 | movdqu %xmm6, 32(%edi) |
367 | add $31, %ebx |
368 | sub %edx, %ebx |
369 | lea 33(%edi, %edx), %edi |
370 | jmp L(StrncpyFillTailWithZero) |
371 | |
372 | .p2align 4 |
373 | L(CopyFrom1To16BytesUnalignedXmm6): |
374 | movdqu %xmm6, (%edi, %ecx) |
375 | jmp L(CopyFrom1To16BytesXmmExit) |
376 | |
377 | .p2align 4 |
378 | L(CopyFrom1To16BytesUnalignedXmm5): |
379 | movdqu %xmm5, (%edi, %ecx) |
380 | jmp L(CopyFrom1To16BytesXmmExit) |
381 | |
382 | .p2align 4 |
383 | L(CopyFrom1To16BytesUnalignedXmm4): |
384 | movdqu %xmm4, (%edi, %ecx) |
385 | jmp L(CopyFrom1To16BytesXmmExit) |
386 | |
387 | .p2align 4 |
388 | L(CopyFrom1To16BytesUnalignedXmm3): |
389 | movdqu %xmm3, (%edi, %ecx) |
390 | jmp L(CopyFrom1To16BytesXmmExit) |
391 | |
392 | .p2align 4 |
393 | L(CopyFrom1To16BytesUnalignedXmm1): |
394 | movdqu %xmm1, (%edi, %ecx) |
395 | jmp L(CopyFrom1To16BytesXmmExit) |
396 | |
397 | .p2align 4 |
398 | L(CopyFrom1To16BytesExit): |
399 | BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) |
400 | |
401 | /* Case2 */ |
402 | |
403 | .p2align 4 |
404 | L(CopyFrom1To16BytesCase2): |
405 | add $16, %ebx |
406 | add %ecx, %edi |
407 | add %ecx, %esi |
408 | bsf %edx, %edx |
409 | cmp %ebx, %edx |
410 | jb L(CopyFrom1To16BytesExit) |
411 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
412 | |
413 | .p2align 4 |
414 | L(CopyFrom1To32BytesCase2): |
415 | sub %ecx, %ebx |
416 | add %ecx, %esi |
417 | bsf %edx, %edx |
418 | add $16, %edx |
419 | sub %ecx, %edx |
420 | cmp %ebx, %edx |
421 | jb L(CopyFrom1To16BytesExit) |
422 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
423 | |
424 | L(CopyFrom1To16BytesTailCase2): |
425 | sub %ecx, %ebx |
426 | add %ecx, %esi |
427 | bsf %edx, %edx |
428 | cmp %ebx, %edx |
429 | jb L(CopyFrom1To16BytesExit) |
430 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
431 | |
432 | L(CopyFrom1To16BytesTail1Case2): |
433 | bsf %edx, %edx |
434 | cmp %ebx, %edx |
435 | jb L(CopyFrom1To16BytesExit) |
436 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
437 | |
438 | /* Case2 or Case3, Case3 */ |
439 | |
440 | .p2align 4 |
441 | L(CopyFrom1To16BytesCase2OrCase3): |
442 | test %edx, %edx |
443 | jnz L(CopyFrom1To16BytesCase2) |
444 | L(CopyFrom1To16BytesCase3): |
445 | add $16, %ebx |
446 | add %ecx, %edi |
447 | add %ecx, %esi |
448 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
449 | |
450 | .p2align 4 |
451 | L(CopyFrom1To32BytesCase2OrCase3): |
452 | test %edx, %edx |
453 | jnz L(CopyFrom1To32BytesCase2) |
454 | sub %ecx, %ebx |
455 | add %ecx, %esi |
456 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
457 | |
458 | .p2align 4 |
459 | L(CopyFrom1To16BytesTailCase2OrCase3): |
460 | test %edx, %edx |
461 | jnz L(CopyFrom1To16BytesTailCase2) |
462 | sub %ecx, %ebx |
463 | add %ecx, %esi |
464 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
465 | |
466 | .p2align 4 |
467 | L(CopyFrom1To32Bytes1Case2OrCase3): |
468 | add $16, %edi |
469 | add $16, %esi |
470 | sub $16, %ebx |
471 | L(CopyFrom1To16BytesTail1Case2OrCase3): |
472 | test %edx, %edx |
473 | jnz L(CopyFrom1To16BytesTail1Case2) |
474 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
475 | |
476 | .p2align 4 |
477 | L(Exit0): |
478 | # ifdef USE_AS_STPCPY |
479 | mov %edi, %eax |
480 | # endif |
481 | RETURN |
482 | |
483 | .p2align 4 |
484 | L(Exit1): |
485 | movb %dh, (%edi) |
486 | # ifdef USE_AS_STPCPY |
487 | lea (%edi), %eax |
488 | # endif |
489 | sub $1, %ebx |
490 | lea 1(%edi), %edi |
491 | jnz L(StrncpyFillTailWithZero) |
492 | RETURN |
493 | |
494 | .p2align 4 |
495 | L(Exit2): |
496 | movw (%esi), %dx |
497 | movw %dx, (%edi) |
498 | # ifdef USE_AS_STPCPY |
499 | lea 1(%edi), %eax |
500 | # endif |
501 | sub $2, %ebx |
502 | lea 2(%edi), %edi |
503 | jnz L(StrncpyFillTailWithZero) |
504 | RETURN |
505 | |
506 | .p2align 4 |
507 | L(Exit3): |
508 | movw (%esi), %cx |
509 | movw %cx, (%edi) |
510 | movb %dh, 2(%edi) |
511 | # ifdef USE_AS_STPCPY |
512 | lea 2(%edi), %eax |
513 | # endif |
514 | sub $3, %ebx |
515 | lea 3(%edi), %edi |
516 | jnz L(StrncpyFillTailWithZero) |
517 | RETURN |
518 | |
519 | .p2align 4 |
520 | L(Exit4): |
521 | movl (%esi), %edx |
522 | movl %edx, (%edi) |
523 | # ifdef USE_AS_STPCPY |
524 | lea 3(%edi), %eax |
525 | # endif |
526 | sub $4, %ebx |
527 | lea 4(%edi), %edi |
528 | jnz L(StrncpyFillTailWithZero) |
529 | RETURN |
530 | |
531 | .p2align 4 |
532 | L(Exit5): |
533 | movl (%esi), %ecx |
534 | movb %dh, 4(%edi) |
535 | movl %ecx, (%edi) |
536 | # ifdef USE_AS_STPCPY |
537 | lea 4(%edi), %eax |
538 | # endif |
539 | sub $5, %ebx |
540 | lea 5(%edi), %edi |
541 | jnz L(StrncpyFillTailWithZero) |
542 | RETURN |
543 | |
544 | .p2align 4 |
545 | L(Exit6): |
546 | movl (%esi), %ecx |
547 | movw 4(%esi), %dx |
548 | movl %ecx, (%edi) |
549 | movw %dx, 4(%edi) |
550 | # ifdef USE_AS_STPCPY |
551 | lea 5(%edi), %eax |
552 | # endif |
553 | sub $6, %ebx |
554 | lea 6(%edi), %edi |
555 | jnz L(StrncpyFillTailWithZero) |
556 | RETURN |
557 | |
558 | .p2align 4 |
559 | L(Exit7): |
560 | movl (%esi), %ecx |
561 | movl 3(%esi), %edx |
562 | movl %ecx, (%edi) |
563 | movl %edx, 3(%edi) |
564 | # ifdef USE_AS_STPCPY |
565 | lea 6(%edi), %eax |
566 | # endif |
567 | sub $7, %ebx |
568 | lea 7(%edi), %edi |
569 | jnz L(StrncpyFillTailWithZero) |
570 | RETURN |
571 | |
572 | .p2align 4 |
573 | L(Exit8): |
574 | movlpd (%esi), %xmm0 |
575 | movlpd %xmm0, (%edi) |
576 | # ifdef USE_AS_STPCPY |
577 | lea 7(%edi), %eax |
578 | # endif |
579 | sub $8, %ebx |
580 | lea 8(%edi), %edi |
581 | jnz L(StrncpyFillTailWithZero) |
582 | RETURN |
583 | |
584 | .p2align 4 |
585 | L(Exit9): |
586 | movlpd (%esi), %xmm0 |
587 | movb %dh, 8(%edi) |
588 | movlpd %xmm0, (%edi) |
589 | # ifdef USE_AS_STPCPY |
590 | lea 8(%edi), %eax |
591 | # endif |
592 | sub $9, %ebx |
593 | lea 9(%edi), %edi |
594 | jnz L(StrncpyFillTailWithZero) |
595 | RETURN |
596 | |
597 | .p2align 4 |
598 | L(Exit10): |
599 | movlpd (%esi), %xmm0 |
600 | movw 8(%esi), %dx |
601 | movlpd %xmm0, (%edi) |
602 | movw %dx, 8(%edi) |
603 | # ifdef USE_AS_STPCPY |
604 | lea 9(%edi), %eax |
605 | # endif |
606 | sub $10, %ebx |
607 | lea 10(%edi), %edi |
608 | jnz L(StrncpyFillTailWithZero) |
609 | RETURN |
610 | |
611 | .p2align 4 |
612 | L(Exit11): |
613 | movlpd (%esi), %xmm0 |
614 | movl 7(%esi), %edx |
615 | movlpd %xmm0, (%edi) |
616 | movl %edx, 7(%edi) |
617 | # ifdef USE_AS_STPCPY |
618 | lea 10(%edi), %eax |
619 | # endif |
620 | sub $11, %ebx |
621 | lea 11(%edi), %edi |
622 | jnz L(StrncpyFillTailWithZero) |
623 | RETURN |
624 | |
625 | .p2align 4 |
626 | L(Exit12): |
627 | movlpd (%esi), %xmm0 |
628 | movl 8(%esi), %edx |
629 | movlpd %xmm0, (%edi) |
630 | movl %edx, 8(%edi) |
631 | # ifdef USE_AS_STPCPY |
632 | lea 11(%edi), %eax |
633 | # endif |
634 | sub $12, %ebx |
635 | lea 12(%edi), %edi |
636 | jnz L(StrncpyFillTailWithZero) |
637 | RETURN |
638 | |
639 | .p2align 4 |
640 | L(Exit13): |
641 | movlpd (%esi), %xmm0 |
642 | movlpd 5(%esi), %xmm1 |
643 | movlpd %xmm0, (%edi) |
644 | movlpd %xmm1, 5(%edi) |
645 | # ifdef USE_AS_STPCPY |
646 | lea 12(%edi), %eax |
647 | # endif |
648 | sub $13, %ebx |
649 | lea 13(%edi), %edi |
650 | jnz L(StrncpyFillTailWithZero) |
651 | RETURN |
652 | |
653 | .p2align 4 |
654 | L(Exit14): |
655 | movlpd (%esi), %xmm0 |
656 | movlpd 6(%esi), %xmm1 |
657 | movlpd %xmm0, (%edi) |
658 | movlpd %xmm1, 6(%edi) |
659 | # ifdef USE_AS_STPCPY |
660 | lea 13(%edi), %eax |
661 | # endif |
662 | sub $14, %ebx |
663 | lea 14(%edi), %edi |
664 | jnz L(StrncpyFillTailWithZero) |
665 | RETURN |
666 | |
667 | .p2align 4 |
668 | L(Exit15): |
669 | movlpd (%esi), %xmm0 |
670 | movlpd 7(%esi), %xmm1 |
671 | movlpd %xmm0, (%edi) |
672 | movlpd %xmm1, 7(%edi) |
673 | # ifdef USE_AS_STPCPY |
674 | lea 14(%edi), %eax |
675 | # endif |
676 | sub $15, %ebx |
677 | lea 15(%edi), %edi |
678 | jnz L(StrncpyFillTailWithZero) |
679 | RETURN |
680 | |
681 | .p2align 4 |
682 | L(Exit16): |
683 | movdqu (%esi), %xmm0 |
684 | movdqu %xmm0, (%edi) |
685 | # ifdef USE_AS_STPCPY |
686 | lea 15(%edi), %eax |
687 | # endif |
688 | sub $16, %ebx |
689 | lea 16(%edi), %edi |
690 | jnz L(StrncpyFillTailWithZero) |
691 | RETURN |
692 | |
693 | .p2align 4 |
694 | L(Exit17): |
695 | movdqu (%esi), %xmm0 |
696 | movdqu %xmm0, (%edi) |
697 | movb %dh, 16(%edi) |
698 | # ifdef USE_AS_STPCPY |
699 | lea 16(%edi), %eax |
700 | # endif |
701 | sub $17, %ebx |
702 | lea 17(%edi), %edi |
703 | jnz L(StrncpyFillTailWithZero) |
704 | RETURN |
705 | |
706 | .p2align 4 |
707 | L(Exit18): |
708 | movdqu (%esi), %xmm0 |
709 | movw 16(%esi), %cx |
710 | movdqu %xmm0, (%edi) |
711 | movw %cx, 16(%edi) |
712 | # ifdef USE_AS_STPCPY |
713 | lea 17(%edi), %eax |
714 | # endif |
715 | sub $18, %ebx |
716 | lea 18(%edi), %edi |
717 | jnz L(StrncpyFillTailWithZero) |
718 | RETURN |
719 | |
720 | .p2align 4 |
721 | L(Exit19): |
722 | movdqu (%esi), %xmm0 |
723 | movl 15(%esi), %ecx |
724 | movdqu %xmm0, (%edi) |
725 | movl %ecx, 15(%edi) |
726 | # ifdef USE_AS_STPCPY |
727 | lea 18(%edi), %eax |
728 | # endif |
729 | sub $19, %ebx |
730 | lea 19(%edi), %edi |
731 | jnz L(StrncpyFillTailWithZero) |
732 | RETURN |
733 | |
734 | .p2align 4 |
735 | L(Exit20): |
736 | movdqu (%esi), %xmm0 |
737 | movl 16(%esi), %ecx |
738 | movdqu %xmm0, (%edi) |
739 | movl %ecx, 16(%edi) |
740 | # ifdef USE_AS_STPCPY |
741 | lea 19(%edi), %eax |
742 | # endif |
743 | sub $20, %ebx |
744 | lea 20(%edi), %edi |
745 | jnz L(StrncpyFillTailWithZero) |
746 | RETURN |
747 | |
748 | .p2align 4 |
749 | L(Exit21): |
750 | movdqu (%esi), %xmm0 |
751 | movl 16(%esi), %ecx |
752 | movdqu %xmm0, (%edi) |
753 | movl %ecx, 16(%edi) |
754 | movb %dh, 20(%edi) |
755 | # ifdef USE_AS_STPCPY |
756 | lea 20(%edi), %eax |
757 | # endif |
758 | sub $21, %ebx |
759 | lea 21(%edi), %edi |
760 | jnz L(StrncpyFillTailWithZero) |
761 | RETURN |
762 | |
763 | .p2align 4 |
764 | L(Exit22): |
765 | movdqu (%esi), %xmm0 |
766 | movlpd 14(%esi), %xmm3 |
767 | movdqu %xmm0, (%edi) |
768 | movlpd %xmm3, 14(%edi) |
769 | # ifdef USE_AS_STPCPY |
770 | lea 21(%edi), %eax |
771 | # endif |
772 | sub $22, %ebx |
773 | lea 22(%edi), %edi |
774 | jnz L(StrncpyFillTailWithZero) |
775 | RETURN |
776 | |
777 | .p2align 4 |
778 | L(Exit23): |
779 | movdqu (%esi), %xmm0 |
780 | movlpd 15(%esi), %xmm3 |
781 | movdqu %xmm0, (%edi) |
782 | movlpd %xmm3, 15(%edi) |
783 | # ifdef USE_AS_STPCPY |
784 | lea 22(%edi), %eax |
785 | # endif |
786 | sub $23, %ebx |
787 | lea 23(%edi), %edi |
788 | jnz L(StrncpyFillTailWithZero) |
789 | RETURN |
790 | |
791 | .p2align 4 |
792 | L(Exit24): |
793 | movdqu (%esi), %xmm0 |
794 | movlpd 16(%esi), %xmm2 |
795 | movdqu %xmm0, (%edi) |
796 | movlpd %xmm2, 16(%edi) |
797 | # ifdef USE_AS_STPCPY |
798 | lea 23(%edi), %eax |
799 | # endif |
800 | sub $24, %ebx |
801 | lea 24(%edi), %edi |
802 | jnz L(StrncpyFillTailWithZero) |
803 | RETURN |
804 | |
805 | .p2align 4 |
806 | L(Exit25): |
807 | movdqu (%esi), %xmm0 |
808 | movlpd 16(%esi), %xmm2 |
809 | movdqu %xmm0, (%edi) |
810 | movlpd %xmm2, 16(%edi) |
811 | movb %dh, 24(%edi) |
812 | # ifdef USE_AS_STPCPY |
813 | lea 24(%edi), %eax |
814 | # endif |
815 | sub $25, %ebx |
816 | lea 25(%edi), %edi |
817 | jnz L(StrncpyFillTailWithZero) |
818 | RETURN |
819 | |
820 | .p2align 4 |
821 | L(Exit26): |
822 | movdqu (%esi), %xmm0 |
823 | movlpd 16(%esi), %xmm2 |
824 | movw 24(%esi), %cx |
825 | movdqu %xmm0, (%edi) |
826 | movlpd %xmm2, 16(%edi) |
827 | movw %cx, 24(%edi) |
828 | # ifdef USE_AS_STPCPY |
829 | lea 25(%edi), %eax |
830 | # endif |
831 | sub $26, %ebx |
832 | lea 26(%edi), %edi |
833 | jnz L(StrncpyFillTailWithZero) |
834 | RETURN |
835 | |
836 | .p2align 4 |
837 | L(Exit27): |
838 | movdqu (%esi), %xmm0 |
839 | movlpd 16(%esi), %xmm2 |
840 | movl 23(%esi), %ecx |
841 | movdqu %xmm0, (%edi) |
842 | movlpd %xmm2, 16(%edi) |
843 | movl %ecx, 23(%edi) |
844 | # ifdef USE_AS_STPCPY |
845 | lea 26(%edi), %eax |
846 | # endif |
847 | sub $27, %ebx |
848 | lea 27(%edi), %edi |
849 | jnz L(StrncpyFillTailWithZero) |
850 | RETURN |
851 | |
852 | .p2align 4 |
853 | L(Exit28): |
854 | movdqu (%esi), %xmm0 |
855 | movlpd 16(%esi), %xmm2 |
856 | movl 24(%esi), %ecx |
857 | movdqu %xmm0, (%edi) |
858 | movlpd %xmm2, 16(%edi) |
859 | movl %ecx, 24(%edi) |
860 | # ifdef USE_AS_STPCPY |
861 | lea 27(%edi), %eax |
862 | # endif |
863 | sub $28, %ebx |
864 | lea 28(%edi), %edi |
865 | jnz L(StrncpyFillTailWithZero) |
866 | RETURN |
867 | |
868 | .p2align 4 |
869 | L(Exit29): |
870 | movdqu (%esi), %xmm0 |
871 | movdqu 13(%esi), %xmm2 |
872 | movdqu %xmm0, (%edi) |
873 | movdqu %xmm2, 13(%edi) |
874 | # ifdef USE_AS_STPCPY |
875 | lea 28(%edi), %eax |
876 | # endif |
877 | sub $29, %ebx |
878 | lea 29(%edi), %edi |
879 | jnz L(StrncpyFillTailWithZero) |
880 | RETURN |
881 | |
882 | .p2align 4 |
883 | L(Exit30): |
884 | movdqu (%esi), %xmm0 |
885 | movdqu 14(%esi), %xmm2 |
886 | movdqu %xmm0, (%edi) |
887 | movdqu %xmm2, 14(%edi) |
888 | # ifdef USE_AS_STPCPY |
889 | lea 29(%edi), %eax |
890 | # endif |
891 | sub $30, %ebx |
892 | lea 30(%edi), %edi |
893 | jnz L(StrncpyFillTailWithZero) |
894 | RETURN |
895 | |
896 | |
897 | .p2align 4 |
898 | L(Exit31): |
899 | movdqu (%esi), %xmm0 |
900 | movdqu 15(%esi), %xmm2 |
901 | movdqu %xmm0, (%edi) |
902 | movdqu %xmm2, 15(%edi) |
903 | # ifdef USE_AS_STPCPY |
904 | lea 30(%edi), %eax |
905 | # endif |
906 | sub $31, %ebx |
907 | lea 31(%edi), %edi |
908 | jnz L(StrncpyFillTailWithZero) |
909 | RETURN |
910 | |
911 | .p2align 4 |
912 | L(Exit32): |
913 | movdqu (%esi), %xmm0 |
914 | movdqu 16(%esi), %xmm2 |
915 | movdqu %xmm0, (%edi) |
916 | movdqu %xmm2, 16(%edi) |
917 | # ifdef USE_AS_STPCPY |
918 | lea 31(%edi), %eax |
919 | # endif |
920 | sub $32, %ebx |
921 | lea 32(%edi), %edi |
922 | jnz L(StrncpyFillTailWithZero) |
923 | RETURN |
924 | |
925 | .p2align 4 |
926 | L(StrncpyExit1): |
927 | movb (%esi), %dl |
928 | movb %dl, (%edi) |
929 | # ifdef USE_AS_STPCPY |
930 | lea 1(%edi), %eax |
931 | # endif |
932 | RETURN |
933 | |
934 | .p2align 4 |
935 | L(StrncpyExit2): |
936 | movw (%esi), %dx |
937 | movw %dx, (%edi) |
938 | # ifdef USE_AS_STPCPY |
939 | lea 2(%edi), %eax |
940 | # endif |
941 | RETURN |
942 | |
943 | .p2align 4 |
944 | L(StrncpyExit3): |
945 | movw (%esi), %cx |
946 | movb 2(%esi), %dl |
947 | movw %cx, (%edi) |
948 | movb %dl, 2(%edi) |
949 | # ifdef USE_AS_STPCPY |
950 | lea 3(%edi), %eax |
951 | # endif |
952 | RETURN |
953 | |
954 | .p2align 4 |
955 | L(StrncpyExit4): |
956 | movl (%esi), %edx |
957 | movl %edx, (%edi) |
958 | # ifdef USE_AS_STPCPY |
959 | lea 4(%edi), %eax |
960 | # endif |
961 | RETURN |
962 | |
963 | .p2align 4 |
964 | L(StrncpyExit5): |
965 | movl (%esi), %ecx |
966 | movb 4(%esi), %dl |
967 | movl %ecx, (%edi) |
968 | movb %dl, 4(%edi) |
969 | # ifdef USE_AS_STPCPY |
970 | lea 5(%edi), %eax |
971 | # endif |
972 | RETURN |
973 | |
974 | .p2align 4 |
975 | L(StrncpyExit6): |
976 | movl (%esi), %ecx |
977 | movw 4(%esi), %dx |
978 | movl %ecx, (%edi) |
979 | movw %dx, 4(%edi) |
980 | # ifdef USE_AS_STPCPY |
981 | lea 6(%edi), %eax |
982 | # endif |
983 | RETURN |
984 | |
985 | .p2align 4 |
986 | L(StrncpyExit7): |
987 | movl (%esi), %ecx |
988 | movl 3(%esi), %edx |
989 | movl %ecx, (%edi) |
990 | movl %edx, 3(%edi) |
991 | # ifdef USE_AS_STPCPY |
992 | lea 7(%edi), %eax |
993 | # endif |
994 | RETURN |
995 | |
996 | .p2align 4 |
997 | L(StrncpyExit8): |
998 | movlpd (%esi), %xmm0 |
999 | movlpd %xmm0, (%edi) |
1000 | # ifdef USE_AS_STPCPY |
1001 | lea 8(%edi), %eax |
1002 | # endif |
1003 | RETURN |
1004 | |
1005 | .p2align 4 |
1006 | L(StrncpyExit9): |
1007 | movlpd (%esi), %xmm0 |
1008 | movb 8(%esi), %dl |
1009 | movlpd %xmm0, (%edi) |
1010 | movb %dl, 8(%edi) |
1011 | # ifdef USE_AS_STPCPY |
1012 | lea 9(%edi), %eax |
1013 | # endif |
1014 | RETURN |
1015 | |
1016 | .p2align 4 |
1017 | L(StrncpyExit10): |
1018 | movlpd (%esi), %xmm0 |
1019 | movw 8(%esi), %dx |
1020 | movlpd %xmm0, (%edi) |
1021 | movw %dx, 8(%edi) |
1022 | # ifdef USE_AS_STPCPY |
1023 | lea 10(%edi), %eax |
1024 | # endif |
1025 | RETURN |
1026 | |
1027 | .p2align 4 |
1028 | L(StrncpyExit11): |
1029 | movlpd (%esi), %xmm0 |
1030 | movl 7(%esi), %edx |
1031 | movlpd %xmm0, (%edi) |
1032 | movl %edx, 7(%edi) |
1033 | # ifdef USE_AS_STPCPY |
1034 | lea 11(%edi), %eax |
1035 | # endif |
1036 | RETURN |
1037 | |
1038 | .p2align 4 |
1039 | L(StrncpyExit12): |
1040 | movlpd (%esi), %xmm0 |
1041 | movl 8(%esi), %edx |
1042 | movlpd %xmm0, (%edi) |
1043 | movl %edx, 8(%edi) |
1044 | # ifdef USE_AS_STPCPY |
1045 | lea 12(%edi), %eax |
1046 | # endif |
1047 | RETURN |
1048 | |
1049 | .p2align 4 |
1050 | L(StrncpyExit13): |
1051 | movlpd (%esi), %xmm0 |
1052 | movlpd 5(%esi), %xmm1 |
1053 | movlpd %xmm0, (%edi) |
1054 | movlpd %xmm1, 5(%edi) |
1055 | # ifdef USE_AS_STPCPY |
1056 | lea 13(%edi), %eax |
1057 | # endif |
1058 | RETURN |
1059 | |
1060 | .p2align 4 |
1061 | L(StrncpyExit14): |
1062 | movlpd (%esi), %xmm0 |
1063 | movlpd 6(%esi), %xmm1 |
1064 | movlpd %xmm0, (%edi) |
1065 | movlpd %xmm1, 6(%edi) |
1066 | # ifdef USE_AS_STPCPY |
1067 | lea 14(%edi), %eax |
1068 | # endif |
1069 | RETURN |
1070 | |
1071 | .p2align 4 |
1072 | L(StrncpyExit15): |
1073 | movlpd (%esi), %xmm0 |
1074 | movlpd 7(%esi), %xmm1 |
1075 | movlpd %xmm0, (%edi) |
1076 | movlpd %xmm1, 7(%edi) |
1077 | # ifdef USE_AS_STPCPY |
1078 | lea 15(%edi), %eax |
1079 | # endif |
1080 | RETURN |
1081 | |
1082 | .p2align 4 |
1083 | L(StrncpyExit16): |
1084 | movdqu (%esi), %xmm0 |
1085 | movdqu %xmm0, (%edi) |
1086 | # ifdef USE_AS_STPCPY |
1087 | lea 16(%edi), %eax |
1088 | # endif |
1089 | RETURN |
1090 | |
1091 | .p2align 4 |
1092 | L(StrncpyExit17): |
1093 | movdqu (%esi), %xmm0 |
1094 | movb 16(%esi), %cl |
1095 | movdqu %xmm0, (%edi) |
1096 | movb %cl, 16(%edi) |
1097 | # ifdef USE_AS_STPCPY |
1098 | lea 17(%edi), %eax |
1099 | # endif |
1100 | RETURN |
1101 | |
1102 | .p2align 4 |
1103 | L(StrncpyExit18): |
1104 | movdqu (%esi), %xmm0 |
1105 | movw 16(%esi), %cx |
1106 | movdqu %xmm0, (%edi) |
1107 | movw %cx, 16(%edi) |
1108 | # ifdef USE_AS_STPCPY |
1109 | lea 18(%edi), %eax |
1110 | # endif |
1111 | RETURN |
1112 | |
1113 | .p2align 4 |
1114 | L(StrncpyExit19): |
1115 | movdqu (%esi), %xmm0 |
1116 | movl 15(%esi), %ecx |
1117 | movdqu %xmm0, (%edi) |
1118 | movl %ecx, 15(%edi) |
1119 | # ifdef USE_AS_STPCPY |
1120 | lea 19(%edi), %eax |
1121 | # endif |
1122 | RETURN |
1123 | |
1124 | .p2align 4 |
1125 | L(StrncpyExit20): |
1126 | movdqu (%esi), %xmm0 |
1127 | movl 16(%esi), %ecx |
1128 | movdqu %xmm0, (%edi) |
1129 | movl %ecx, 16(%edi) |
1130 | # ifdef USE_AS_STPCPY |
1131 | lea 20(%edi), %eax |
1132 | # endif |
1133 | RETURN |
1134 | |
1135 | .p2align 4 |
1136 | L(StrncpyExit21): |
1137 | movdqu (%esi), %xmm0 |
1138 | movl 16(%esi), %ecx |
1139 | movb 20(%esi), %dl |
1140 | movdqu %xmm0, (%edi) |
1141 | movl %ecx, 16(%edi) |
1142 | movb %dl, 20(%edi) |
1143 | # ifdef USE_AS_STPCPY |
1144 | lea 21(%edi), %eax |
1145 | # endif |
1146 | RETURN |
1147 | |
1148 | .p2align 4 |
1149 | L(StrncpyExit22): |
1150 | movdqu (%esi), %xmm0 |
1151 | movlpd 14(%esi), %xmm3 |
1152 | movdqu %xmm0, (%edi) |
1153 | movlpd %xmm3, 14(%edi) |
1154 | # ifdef USE_AS_STPCPY |
1155 | lea 22(%edi), %eax |
1156 | # endif |
1157 | RETURN |
1158 | |
1159 | .p2align 4 |
1160 | L(StrncpyExit23): |
1161 | movdqu (%esi), %xmm0 |
1162 | movlpd 15(%esi), %xmm3 |
1163 | movdqu %xmm0, (%edi) |
1164 | movlpd %xmm3, 15(%edi) |
1165 | # ifdef USE_AS_STPCPY |
1166 | lea 23(%edi), %eax |
1167 | # endif |
1168 | RETURN |
1169 | |
1170 | .p2align 4 |
1171 | L(StrncpyExit24): |
1172 | movdqu (%esi), %xmm0 |
1173 | movlpd 16(%esi), %xmm2 |
1174 | movdqu %xmm0, (%edi) |
1175 | movlpd %xmm2, 16(%edi) |
1176 | # ifdef USE_AS_STPCPY |
1177 | lea 24(%edi), %eax |
1178 | # endif |
1179 | RETURN |
1180 | |
1181 | .p2align 4 |
1182 | L(StrncpyExit25): |
1183 | movdqu (%esi), %xmm0 |
1184 | movlpd 16(%esi), %xmm2 |
1185 | movb 24(%esi), %cl |
1186 | movdqu %xmm0, (%edi) |
1187 | movlpd %xmm2, 16(%edi) |
1188 | movb %cl, 24(%edi) |
1189 | # ifdef USE_AS_STPCPY |
1190 | lea 25(%edi), %eax |
1191 | # endif |
1192 | RETURN |
1193 | |
1194 | .p2align 4 |
1195 | L(StrncpyExit26): |
1196 | movdqu (%esi), %xmm0 |
1197 | movlpd 16(%esi), %xmm2 |
1198 | movw 24(%esi), %cx |
1199 | movdqu %xmm0, (%edi) |
1200 | movlpd %xmm2, 16(%edi) |
1201 | movw %cx, 24(%edi) |
1202 | # ifdef USE_AS_STPCPY |
1203 | lea 26(%edi), %eax |
1204 | # endif |
1205 | RETURN |
1206 | |
1207 | .p2align 4 |
1208 | L(StrncpyExit27): |
1209 | movdqu (%esi), %xmm0 |
1210 | movlpd 16(%esi), %xmm2 |
1211 | movl 23(%esi), %ecx |
1212 | movdqu %xmm0, (%edi) |
1213 | movlpd %xmm2, 16(%edi) |
1214 | movl %ecx, 23(%edi) |
1215 | # ifdef USE_AS_STPCPY |
1216 | lea 27(%edi), %eax |
1217 | # endif |
1218 | RETURN |
1219 | |
1220 | .p2align 4 |
1221 | L(StrncpyExit28): |
1222 | movdqu (%esi), %xmm0 |
1223 | movlpd 16(%esi), %xmm2 |
1224 | movl 24(%esi), %ecx |
1225 | movdqu %xmm0, (%edi) |
1226 | movlpd %xmm2, 16(%edi) |
1227 | movl %ecx, 24(%edi) |
1228 | # ifdef USE_AS_STPCPY |
1229 | lea 28(%edi), %eax |
1230 | # endif |
1231 | RETURN |
1232 | |
1233 | .p2align 4 |
1234 | L(StrncpyExit29): |
1235 | movdqu (%esi), %xmm0 |
1236 | movdqu 13(%esi), %xmm2 |
1237 | movdqu %xmm0, (%edi) |
1238 | movdqu %xmm2, 13(%edi) |
1239 | # ifdef USE_AS_STPCPY |
1240 | lea 29(%edi), %eax |
1241 | # endif |
1242 | RETURN |
1243 | |
1244 | .p2align 4 |
1245 | L(StrncpyExit30): |
1246 | movdqu (%esi), %xmm0 |
1247 | movdqu 14(%esi), %xmm2 |
1248 | movdqu %xmm0, (%edi) |
1249 | movdqu %xmm2, 14(%edi) |
1250 | # ifdef USE_AS_STPCPY |
1251 | lea 30(%edi), %eax |
1252 | # endif |
1253 | RETURN |
1254 | |
1255 | .p2align 4 |
1256 | L(StrncpyExit31): |
1257 | movdqu (%esi), %xmm0 |
1258 | movdqu 15(%esi), %xmm2 |
1259 | movdqu %xmm0, (%edi) |
1260 | movdqu %xmm2, 15(%edi) |
1261 | # ifdef USE_AS_STPCPY |
1262 | lea 31(%edi), %eax |
1263 | # endif |
1264 | RETURN |
1265 | |
1266 | .p2align 4 |
1267 | L(StrncpyExit32): |
1268 | movdqu (%esi), %xmm0 |
1269 | movdqu 16(%esi), %xmm2 |
1270 | movdqu %xmm0, (%edi) |
1271 | movdqu %xmm2, 16(%edi) |
1272 | # ifdef USE_AS_STPCPY |
1273 | lea 32(%edi), %eax |
1274 | # endif |
1275 | RETURN |
1276 | |
1277 | .p2align 4 |
1278 | L(StrncpyExit33): |
1279 | movdqu (%esi), %xmm0 |
1280 | movdqu 16(%esi), %xmm2 |
1281 | movb 32(%esi), %cl |
1282 | movdqu %xmm0, (%edi) |
1283 | movdqu %xmm2, 16(%edi) |
1284 | movb %cl, 32(%edi) |
1285 | RETURN |
1286 | |
1287 | .p2align 4 |
1288 | L(Fill0): |
1289 | RETURN |
1290 | |
1291 | .p2align 4 |
1292 | L(Fill1): |
1293 | movb %dl, (%edi) |
1294 | RETURN |
1295 | |
1296 | .p2align 4 |
1297 | L(Fill2): |
1298 | movw %dx, (%edi) |
1299 | RETURN |
1300 | |
1301 | .p2align 4 |
1302 | L(Fill3): |
1303 | movl %edx, -1(%edi) |
1304 | RETURN |
1305 | |
1306 | .p2align 4 |
1307 | L(Fill4): |
1308 | movl %edx, (%edi) |
1309 | RETURN |
1310 | |
1311 | .p2align 4 |
1312 | L(Fill5): |
1313 | movl %edx, (%edi) |
1314 | movb %dl, 4(%edi) |
1315 | RETURN |
1316 | |
1317 | .p2align 4 |
1318 | L(Fill6): |
1319 | movl %edx, (%edi) |
1320 | movw %dx, 4(%edi) |
1321 | RETURN |
1322 | |
1323 | .p2align 4 |
1324 | L(Fill7): |
1325 | movlpd %xmm0, -1(%edi) |
1326 | RETURN |
1327 | |
1328 | .p2align 4 |
1329 | L(Fill8): |
1330 | movlpd %xmm0, (%edi) |
1331 | RETURN |
1332 | |
1333 | .p2align 4 |
1334 | L(Fill9): |
1335 | movlpd %xmm0, (%edi) |
1336 | movb %dl, 8(%edi) |
1337 | RETURN |
1338 | |
1339 | .p2align 4 |
1340 | L(Fill10): |
1341 | movlpd %xmm0, (%edi) |
1342 | movw %dx, 8(%edi) |
1343 | RETURN |
1344 | |
1345 | .p2align 4 |
1346 | L(Fill11): |
1347 | movlpd %xmm0, (%edi) |
1348 | movl %edx, 7(%edi) |
1349 | RETURN |
1350 | |
1351 | .p2align 4 |
1352 | L(Fill12): |
1353 | movlpd %xmm0, (%edi) |
1354 | movl %edx, 8(%edi) |
1355 | RETURN |
1356 | |
1357 | .p2align 4 |
1358 | L(Fill13): |
1359 | movlpd %xmm0, (%edi) |
1360 | movlpd %xmm0, 5(%edi) |
1361 | RETURN |
1362 | |
1363 | .p2align 4 |
1364 | L(Fill14): |
1365 | movlpd %xmm0, (%edi) |
1366 | movlpd %xmm0, 6(%edi) |
1367 | RETURN |
1368 | |
1369 | .p2align 4 |
1370 | L(Fill15): |
1371 | movdqu %xmm0, -1(%edi) |
1372 | RETURN |
1373 | |
1374 | .p2align 4 |
1375 | L(Fill16): |
1376 | movdqu %xmm0, (%edi) |
1377 | RETURN |
1378 | |
1379 | .p2align 4 |
1380 | L(CopyFrom1To16BytesUnalignedXmm2): |
1381 | movdqu %xmm2, (%edi, %ecx) |
1382 | |
1383 | .p2align 4 |
1384 | L(CopyFrom1To16BytesXmmExit): |
1385 | bsf %edx, %edx |
1386 | add $15, %ebx |
1387 | add %ecx, %edi |
1388 | # ifdef USE_AS_STPCPY |
1389 | lea (%edi, %edx), %eax |
1390 | # endif |
1391 | sub %edx, %ebx |
1392 | lea 1(%edi, %edx), %edi |
1393 | |
1394 | .p2align 4 |
1395 | L(StrncpyFillTailWithZero): |
1396 | pxor %xmm0, %xmm0 |
1397 | xor %edx, %edx |
1398 | sub $16, %ebx |
1399 | jbe L(StrncpyFillExit) |
1400 | |
1401 | movdqu %xmm0, (%edi) |
1402 | add $16, %edi |
1403 | |
1404 | mov %edi, %esi |
1405 | and $0xf, %esi |
1406 | sub %esi, %edi |
1407 | add %esi, %ebx |
1408 | sub $64, %ebx |
1409 | jb L(StrncpyFillLess64) |
1410 | |
1411 | L(StrncpyFillLoopMovdqa): |
1412 | movdqa %xmm0, (%edi) |
1413 | movdqa %xmm0, 16(%edi) |
1414 | movdqa %xmm0, 32(%edi) |
1415 | movdqa %xmm0, 48(%edi) |
1416 | add $64, %edi |
1417 | sub $64, %ebx |
1418 | jae L(StrncpyFillLoopMovdqa) |
1419 | |
1420 | L(StrncpyFillLess64): |
1421 | add $32, %ebx |
1422 | jl L(StrncpyFillLess32) |
1423 | movdqa %xmm0, (%edi) |
1424 | movdqa %xmm0, 16(%edi) |
1425 | add $32, %edi |
1426 | sub $16, %ebx |
1427 | jl L(StrncpyFillExit) |
1428 | movdqa %xmm0, (%edi) |
1429 | add $16, %edi |
1430 | BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) |
1431 | |
1432 | L(StrncpyFillLess32): |
1433 | add $16, %ebx |
1434 | jl L(StrncpyFillExit) |
1435 | movdqa %xmm0, (%edi) |
1436 | add $16, %edi |
1437 | BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) |
1438 | |
1439 | L(StrncpyFillExit): |
1440 | add $16, %ebx |
1441 | BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) |
1442 | |
1443 | .p2align 4 |
1444 | L(UnalignedLeaveCase2OrCase3): |
1445 | test %edx, %edx |
1446 | jnz L(Unaligned64LeaveCase2) |
1447 | L(Unaligned64LeaveCase3): |
1448 | lea 64(%ebx), %ecx |
1449 | and $-16, %ecx |
1450 | add $48, %ebx |
1451 | jl L(CopyFrom1To16BytesCase3) |
1452 | movdqu %xmm4, (%edi) |
1453 | sub $16, %ebx |
1454 | jb L(CopyFrom1To16BytesCase3) |
1455 | movdqu %xmm5, 16(%edi) |
1456 | sub $16, %ebx |
1457 | jb L(CopyFrom1To16BytesCase3) |
1458 | movdqu %xmm6, 32(%edi) |
1459 | sub $16, %ebx |
1460 | jb L(CopyFrom1To16BytesCase3) |
1461 | movdqu %xmm7, 48(%edi) |
1462 | # ifdef USE_AS_STPCPY |
1463 | lea 64(%edi), %eax |
1464 | # endif |
1465 | RETURN |
1466 | |
1467 | .p2align 4 |
1468 | L(Unaligned64LeaveCase2): |
1469 | xor %ecx, %ecx |
1470 | pcmpeqb %xmm4, %xmm0 |
1471 | pmovmskb %xmm0, %edx |
1472 | add $48, %ebx |
1473 | jle L(CopyFrom1To16BytesCase2OrCase3) |
1474 | test %edx, %edx |
1475 | jnz L(CopyFrom1To16BytesUnalignedXmm4) |
1476 | |
1477 | pcmpeqb %xmm5, %xmm0 |
1478 | pmovmskb %xmm0, %edx |
1479 | movdqu %xmm4, (%edi) |
1480 | add $16, %ecx |
1481 | sub $16, %ebx |
1482 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
1483 | test %edx, %edx |
1484 | jnz L(CopyFrom1To16BytesUnalignedXmm5) |
1485 | |
1486 | pcmpeqb %xmm6, %xmm0 |
1487 | pmovmskb %xmm0, %edx |
1488 | movdqu %xmm5, 16(%edi) |
1489 | add $16, %ecx |
1490 | sub $16, %ebx |
1491 | jbe L(CopyFrom1To16BytesCase2OrCase3) |
1492 | test %edx, %edx |
1493 | jnz L(CopyFrom1To16BytesUnalignedXmm6) |
1494 | |
1495 | pcmpeqb %xmm7, %xmm0 |
1496 | pmovmskb %xmm0, %edx |
1497 | movdqu %xmm6, 32(%edi) |
1498 | lea 16(%edi, %ecx), %edi |
1499 | lea 16(%esi, %ecx), %esi |
1500 | bsf %edx, %edx |
1501 | cmp %ebx, %edx |
1502 | jb L(CopyFrom1To16BytesExit) |
1503 | BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) |
1504 | |
1505 | .p2align 4 |
1506 | L(ExitZero): |
1507 | movl %edi, %eax |
1508 | RETURN |
1509 | |
1510 | END (STRCPY) |
1511 | |
1512 | .p2align 4 |
1513 | .section .rodata |
1514 | L(ExitTable): |
1515 | .int JMPTBL(L(Exit1), L(ExitTable)) |
1516 | .int JMPTBL(L(Exit2), L(ExitTable)) |
1517 | .int JMPTBL(L(Exit3), L(ExitTable)) |
1518 | .int JMPTBL(L(Exit4), L(ExitTable)) |
1519 | .int JMPTBL(L(Exit5), L(ExitTable)) |
1520 | .int JMPTBL(L(Exit6), L(ExitTable)) |
1521 | .int JMPTBL(L(Exit7), L(ExitTable)) |
1522 | .int JMPTBL(L(Exit8), L(ExitTable)) |
1523 | .int JMPTBL(L(Exit9), L(ExitTable)) |
1524 | .int JMPTBL(L(Exit10), L(ExitTable)) |
1525 | .int JMPTBL(L(Exit11), L(ExitTable)) |
1526 | .int JMPTBL(L(Exit12), L(ExitTable)) |
1527 | .int JMPTBL(L(Exit13), L(ExitTable)) |
1528 | .int JMPTBL(L(Exit14), L(ExitTable)) |
1529 | .int JMPTBL(L(Exit15), L(ExitTable)) |
1530 | .int JMPTBL(L(Exit16), L(ExitTable)) |
1531 | .int JMPTBL(L(Exit17), L(ExitTable)) |
1532 | .int JMPTBL(L(Exit18), L(ExitTable)) |
1533 | .int JMPTBL(L(Exit19), L(ExitTable)) |
1534 | .int JMPTBL(L(Exit20), L(ExitTable)) |
1535 | .int JMPTBL(L(Exit21), L(ExitTable)) |
1536 | .int JMPTBL(L(Exit22), L(ExitTable)) |
1537 | .int JMPTBL(L(Exit23), L(ExitTable)) |
1538 | .int JMPTBL(L(Exit24), L(ExitTable)) |
1539 | .int JMPTBL(L(Exit25), L(ExitTable)) |
1540 | .int JMPTBL(L(Exit26), L(ExitTable)) |
1541 | .int JMPTBL(L(Exit27), L(ExitTable)) |
1542 | .int JMPTBL(L(Exit28), L(ExitTable)) |
1543 | .int JMPTBL(L(Exit29), L(ExitTable)) |
1544 | .int JMPTBL(L(Exit30), L(ExitTable)) |
1545 | .int JMPTBL(L(Exit31), L(ExitTable)) |
1546 | .int JMPTBL(L(Exit32), L(ExitTable)) |
1547 | |
1548 | L(ExitStrncpyTable): |
1549 | .int JMPTBL(L(Exit0), L(ExitStrncpyTable)) |
1550 | .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable)) |
1551 | .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable)) |
1552 | .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable)) |
1553 | .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable)) |
1554 | .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable)) |
1555 | .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable)) |
1556 | .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable)) |
1557 | .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable)) |
1558 | .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable)) |
1559 | .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable)) |
1560 | .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable)) |
1561 | .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable)) |
1562 | .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable)) |
1563 | .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable)) |
1564 | .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable)) |
1565 | .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable)) |
1566 | .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable)) |
1567 | .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable)) |
1568 | .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable)) |
1569 | .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable)) |
1570 | .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable)) |
1571 | .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable)) |
1572 | .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable)) |
1573 | .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable)) |
1574 | .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable)) |
1575 | .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable)) |
1576 | .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable)) |
1577 | .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable)) |
1578 | .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable)) |
1579 | .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable)) |
1580 | .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) |
1581 | .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) |
1582 | .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) |
1583 | |
1584 | .p2align 4 |
1585 | L(FillTable): |
1586 | .int JMPTBL(L(Fill0), L(FillTable)) |
1587 | .int JMPTBL(L(Fill1), L(FillTable)) |
1588 | .int JMPTBL(L(Fill2), L(FillTable)) |
1589 | .int JMPTBL(L(Fill3), L(FillTable)) |
1590 | .int JMPTBL(L(Fill4), L(FillTable)) |
1591 | .int JMPTBL(L(Fill5), L(FillTable)) |
1592 | .int JMPTBL(L(Fill6), L(FillTable)) |
1593 | .int JMPTBL(L(Fill7), L(FillTable)) |
1594 | .int JMPTBL(L(Fill8), L(FillTable)) |
1595 | .int JMPTBL(L(Fill9), L(FillTable)) |
1596 | .int JMPTBL(L(Fill10), L(FillTable)) |
1597 | .int JMPTBL(L(Fill11), L(FillTable)) |
1598 | .int JMPTBL(L(Fill12), L(FillTable)) |
1599 | .int JMPTBL(L(Fill13), L(FillTable)) |
1600 | .int JMPTBL(L(Fill14), L(FillTable)) |
1601 | .int JMPTBL(L(Fill15), L(FillTable)) |
1602 | .int JMPTBL(L(Fill16), L(FillTable)) |
1603 | # else |
1604 | # define PARMS 4 |
1605 | # define ENTRANCE |
1606 | # define RETURN POP (%edi); ret; CFI_PUSH (%edi) |
1607 | # define RETURN1 ret |
1608 | |
1609 | .text |
1610 | ENTRY (STRCPY) |
1611 | ENTRANCE |
1612 | mov STR1(%esp), %edx |
1613 | mov STR2(%esp), %ecx |
1614 | |
1615 | cmpb $0, (%ecx) |
1616 | jz L(ExitTail1) |
1617 | cmpb $0, 1(%ecx) |
1618 | jz L(ExitTail2) |
1619 | cmpb $0, 2(%ecx) |
1620 | jz L(ExitTail3) |
1621 | cmpb $0, 3(%ecx) |
1622 | jz L(ExitTail4) |
1623 | cmpb $0, 4(%ecx) |
1624 | jz L(ExitTail5) |
1625 | cmpb $0, 5(%ecx) |
1626 | jz L(ExitTail6) |
1627 | cmpb $0, 6(%ecx) |
1628 | jz L(ExitTail7) |
1629 | cmpb $0, 7(%ecx) |
1630 | jz L(ExitTail8) |
1631 | cmpb $0, 8(%ecx) |
1632 | jz L(ExitTail9) |
1633 | cmpb $0, 9(%ecx) |
1634 | jz L(ExitTail10) |
1635 | cmpb $0, 10(%ecx) |
1636 | jz L(ExitTail11) |
1637 | cmpb $0, 11(%ecx) |
1638 | jz L(ExitTail12) |
1639 | cmpb $0, 12(%ecx) |
1640 | jz L(ExitTail13) |
1641 | cmpb $0, 13(%ecx) |
1642 | jz L(ExitTail14) |
1643 | cmpb $0, 14(%ecx) |
1644 | jz L(ExitTail15) |
1645 | cmpb $0, 15(%ecx) |
1646 | jz L(ExitTail16) |
1647 | |
1648 | PUSH (%edi) |
1649 | PUSH (%ebx) |
1650 | |
1651 | mov %edx, %edi |
1652 | lea 16(%ecx), %ebx |
1653 | and $-16, %ebx |
1654 | pxor %xmm0, %xmm0 |
1655 | movdqu (%ecx), %xmm1 |
1656 | movdqu %xmm1, (%edx) |
1657 | pcmpeqb (%ebx), %xmm0 |
1658 | pmovmskb %xmm0, %eax |
1659 | sub %ecx, %ebx |
1660 | test %eax, %eax |
1661 | jnz L(CopyFrom1To16Bytes) |
1662 | |
1663 | mov %ecx, %eax |
1664 | lea 16(%ecx), %ecx |
1665 | and $-16, %ecx |
1666 | sub %ecx, %eax |
1667 | sub %eax, %edx |
1668 | xor %ebx, %ebx |
1669 | |
1670 | .p2align 4 |
1671 | movdqa (%ecx), %xmm1 |
1672 | movaps 16(%ecx), %xmm2 |
1673 | movdqu %xmm1, (%edx) |
1674 | pcmpeqb %xmm2, %xmm0 |
1675 | pmovmskb %xmm0, %eax |
1676 | add $16, %ebx |
1677 | test %eax, %eax |
1678 | jnz L(CopyFrom1To16Bytes) |
1679 | |
1680 | movaps 16(%ecx, %ebx), %xmm3 |
1681 | movdqu %xmm2, (%edx, %ebx) |
1682 | pcmpeqb %xmm3, %xmm0 |
1683 | pmovmskb %xmm0, %eax |
1684 | add $16, %ebx |
1685 | test %eax, %eax |
1686 | jnz L(CopyFrom1To16Bytes) |
1687 | |
1688 | movaps 16(%ecx, %ebx), %xmm4 |
1689 | movdqu %xmm3, (%edx, %ebx) |
1690 | pcmpeqb %xmm4, %xmm0 |
1691 | pmovmskb %xmm0, %eax |
1692 | add $16, %ebx |
1693 | test %eax, %eax |
1694 | jnz L(CopyFrom1To16Bytes) |
1695 | |
1696 | movaps 16(%ecx, %ebx), %xmm1 |
1697 | movdqu %xmm4, (%edx, %ebx) |
1698 | pcmpeqb %xmm1, %xmm0 |
1699 | pmovmskb %xmm0, %eax |
1700 | add $16, %ebx |
1701 | test %eax, %eax |
1702 | jnz L(CopyFrom1To16Bytes) |
1703 | |
1704 | movaps 16(%ecx, %ebx), %xmm2 |
1705 | movdqu %xmm1, (%edx, %ebx) |
1706 | pcmpeqb %xmm2, %xmm0 |
1707 | pmovmskb %xmm0, %eax |
1708 | add $16, %ebx |
1709 | test %eax, %eax |
1710 | jnz L(CopyFrom1To16Bytes) |
1711 | |
1712 | movaps 16(%ecx, %ebx), %xmm3 |
1713 | movdqu %xmm2, (%edx, %ebx) |
1714 | pcmpeqb %xmm3, %xmm0 |
1715 | pmovmskb %xmm0, %eax |
1716 | add $16, %ebx |
1717 | test %eax, %eax |
1718 | jnz L(CopyFrom1To16Bytes) |
1719 | |
1720 | movdqu %xmm3, (%edx, %ebx) |
1721 | mov %ecx, %eax |
1722 | lea 16(%ecx, %ebx), %ecx |
1723 | and $-0x40, %ecx |
1724 | sub %ecx, %eax |
1725 | sub %eax, %edx |
1726 | |
1727 | L(Aligned64Loop): |
1728 | movaps (%ecx), %xmm2 |
1729 | movaps %xmm2, %xmm4 |
1730 | movaps 16(%ecx), %xmm5 |
1731 | movaps 32(%ecx), %xmm3 |
1732 | movaps %xmm3, %xmm6 |
1733 | movaps 48(%ecx), %xmm7 |
1734 | pminub %xmm5, %xmm2 |
1735 | add $64, %ecx |
1736 | pminub %xmm7, %xmm3 |
1737 | add $64, %edx |
1738 | pminub %xmm2, %xmm3 |
1739 | pcmpeqb %xmm0, %xmm3 |
1740 | pmovmskb %xmm3, %eax |
1741 | test %eax, %eax |
1742 | jnz L(Aligned64Leave) |
1743 | L(Aligned64Loop_start): |
1744 | movdqu %xmm4, -64(%edx) |
1745 | movaps (%ecx), %xmm2 |
1746 | movdqa %xmm2, %xmm4 |
1747 | movdqu %xmm5, -48(%edx) |
1748 | movaps 16(%ecx), %xmm5 |
1749 | pminub %xmm5, %xmm2 |
1750 | movaps 32(%ecx), %xmm3 |
1751 | movdqu %xmm6, -32(%edx) |
1752 | movaps %xmm3, %xmm6 |
1753 | movdqu %xmm7, -16(%edx) |
1754 | movaps 48(%ecx), %xmm7 |
1755 | pminub %xmm7, %xmm3 |
1756 | pminub %xmm2, %xmm3 |
1757 | pcmpeqb %xmm3, %xmm0 |
1758 | pmovmskb %xmm0, %eax |
1759 | add $64, %edx |
1760 | add $64, %ecx |
1761 | test %eax, %eax |
1762 | jz L(Aligned64Loop_start) |
1763 | L(Aligned64Leave): |
1764 | sub $0xa0, %ebx |
1765 | pxor %xmm0, %xmm0 |
1766 | pcmpeqb %xmm4, %xmm0 |
1767 | pmovmskb %xmm0, %eax |
1768 | test %eax, %eax |
1769 | jnz L(CopyFrom1To16Bytes) |
1770 | |
1771 | pcmpeqb %xmm5, %xmm0 |
1772 | pmovmskb %xmm0, %eax |
1773 | movdqu %xmm4, -64(%edx) |
1774 | test %eax, %eax |
1775 | lea 16(%ebx), %ebx |
1776 | jnz L(CopyFrom1To16Bytes) |
1777 | |
1778 | pcmpeqb %xmm6, %xmm0 |
1779 | pmovmskb %xmm0, %eax |
1780 | movdqu %xmm5, -48(%edx) |
1781 | test %eax, %eax |
1782 | lea 16(%ebx), %ebx |
1783 | jnz L(CopyFrom1To16Bytes) |
1784 | |
1785 | movdqu %xmm6, -32(%edx) |
1786 | pcmpeqb %xmm7, %xmm0 |
1787 | pmovmskb %xmm0, %eax |
1788 | lea 16(%ebx), %ebx |
1789 | |
1790 | /*-----------------End of main part---------------------------*/ |
1791 | |
1792 | .p2align 4 |
1793 | L(CopyFrom1To16Bytes): |
1794 | add %ebx, %edx |
1795 | add %ebx, %ecx |
1796 | |
1797 | POP (%ebx) |
1798 | test %al, %al |
1799 | jz L(ExitHigh) |
1800 | test $0x01, %al |
1801 | jnz L(Exit1) |
1802 | test $0x02, %al |
1803 | jnz L(Exit2) |
1804 | test $0x04, %al |
1805 | jnz L(Exit3) |
1806 | test $0x08, %al |
1807 | jnz L(Exit4) |
1808 | test $0x10, %al |
1809 | jnz L(Exit5) |
1810 | test $0x20, %al |
1811 | jnz L(Exit6) |
1812 | test $0x40, %al |
1813 | jnz L(Exit7) |
1814 | /* Exit 8 */ |
1815 | movl (%ecx), %eax |
1816 | movl %eax, (%edx) |
1817 | movl 4(%ecx), %eax |
1818 | movl %eax, 4(%edx) |
1819 | # ifdef USE_AS_STPCPY |
1820 | lea 7(%edx), %eax |
1821 | # else |
1822 | movl %edi, %eax |
1823 | # endif |
1824 | RETURN |
1825 | |
1826 | .p2align 4 |
1827 | L(ExitHigh): |
1828 | test $0x01, %ah |
1829 | jnz L(Exit9) |
1830 | test $0x02, %ah |
1831 | jnz L(Exit10) |
1832 | test $0x04, %ah |
1833 | jnz L(Exit11) |
1834 | test $0x08, %ah |
1835 | jnz L(Exit12) |
1836 | test $0x10, %ah |
1837 | jnz L(Exit13) |
1838 | test $0x20, %ah |
1839 | jnz L(Exit14) |
1840 | test $0x40, %ah |
1841 | jnz L(Exit15) |
1842 | /* Exit 16 */ |
1843 | movlpd (%ecx), %xmm0 |
1844 | movlpd %xmm0, (%edx) |
1845 | movlpd 8(%ecx), %xmm0 |
1846 | movlpd %xmm0, 8(%edx) |
1847 | # ifdef USE_AS_STPCPY |
1848 | lea 15(%edx), %eax |
1849 | # else |
1850 | movl %edi, %eax |
1851 | # endif |
1852 | RETURN |
1853 | |
1854 | .p2align 4 |
1855 | L(Exit1): |
1856 | movb (%ecx), %al |
1857 | movb %al, (%edx) |
1858 | # ifdef USE_AS_STPCPY |
1859 | lea (%edx), %eax |
1860 | # else |
1861 | movl %edi, %eax |
1862 | # endif |
1863 | RETURN |
1864 | |
1865 | .p2align 4 |
1866 | L(Exit2): |
1867 | movw (%ecx), %ax |
1868 | movw %ax, (%edx) |
1869 | # ifdef USE_AS_STPCPY |
1870 | lea 1(%edx), %eax |
1871 | # else |
1872 | movl %edi, %eax |
1873 | # endif |
1874 | RETURN |
1875 | |
1876 | .p2align 4 |
1877 | L(Exit3): |
1878 | movw (%ecx), %ax |
1879 | movw %ax, (%edx) |
1880 | movb 2(%ecx), %al |
1881 | movb %al, 2(%edx) |
1882 | # ifdef USE_AS_STPCPY |
1883 | lea 2(%edx), %eax |
1884 | # else |
1885 | movl %edi, %eax |
1886 | # endif |
1887 | RETURN |
1888 | |
1889 | .p2align 4 |
1890 | L(Exit4): |
1891 | movl (%ecx), %eax |
1892 | movl %eax, (%edx) |
1893 | # ifdef USE_AS_STPCPY |
1894 | lea 3(%edx), %eax |
1895 | # else |
1896 | movl %edi, %eax |
1897 | # endif |
1898 | RETURN |
1899 | |
1900 | .p2align 4 |
1901 | L(Exit5): |
1902 | movl (%ecx), %eax |
1903 | movl %eax, (%edx) |
1904 | movb 4(%ecx), %al |
1905 | movb %al, 4(%edx) |
1906 | # ifdef USE_AS_STPCPY |
1907 | lea 4(%edx), %eax |
1908 | # else |
1909 | movl %edi, %eax |
1910 | # endif |
1911 | RETURN |
1912 | |
1913 | .p2align 4 |
1914 | L(Exit6): |
1915 | movl (%ecx), %eax |
1916 | movl %eax, (%edx) |
1917 | movw 4(%ecx), %ax |
1918 | movw %ax, 4(%edx) |
1919 | # ifdef USE_AS_STPCPY |
1920 | lea 5(%edx), %eax |
1921 | # else |
1922 | movl %edi, %eax |
1923 | # endif |
1924 | RETURN |
1925 | |
1926 | .p2align 4 |
1927 | L(Exit7): |
1928 | movl (%ecx), %eax |
1929 | movl %eax, (%edx) |
1930 | movl 3(%ecx), %eax |
1931 | movl %eax, 3(%edx) |
1932 | # ifdef USE_AS_STPCPY |
1933 | lea 6(%edx), %eax |
1934 | # else |
1935 | movl %edi, %eax |
1936 | # endif |
1937 | RETURN |
1938 | |
1939 | .p2align 4 |
1940 | L(Exit9): |
1941 | movl (%ecx), %eax |
1942 | movl %eax, (%edx) |
1943 | movl 4(%ecx), %eax |
1944 | movl %eax, 4(%edx) |
1945 | movb 8(%ecx), %al |
1946 | movb %al, 8(%edx) |
1947 | # ifdef USE_AS_STPCPY |
1948 | lea 8(%edx), %eax |
1949 | # else |
1950 | movl %edi, %eax |
1951 | # endif |
1952 | RETURN |
1953 | |
1954 | .p2align 4 |
1955 | L(Exit10): |
1956 | movl (%ecx), %eax |
1957 | movl %eax, (%edx) |
1958 | movl 4(%ecx), %eax |
1959 | movl %eax, 4(%edx) |
1960 | movw 8(%ecx), %ax |
1961 | movw %ax, 8(%edx) |
1962 | # ifdef USE_AS_STPCPY |
1963 | lea 9(%edx), %eax |
1964 | # else |
1965 | movl %edi, %eax |
1966 | # endif |
1967 | RETURN |
1968 | |
1969 | .p2align 4 |
1970 | L(Exit11): |
1971 | movl (%ecx), %eax |
1972 | movl %eax, (%edx) |
1973 | movl 4(%ecx), %eax |
1974 | movl %eax, 4(%edx) |
1975 | movl 7(%ecx), %eax |
1976 | movl %eax, 7(%edx) |
1977 | # ifdef USE_AS_STPCPY |
1978 | lea 10(%edx), %eax |
1979 | # else |
1980 | movl %edi, %eax |
1981 | # endif |
1982 | RETURN |
1983 | |
1984 | .p2align 4 |
1985 | L(Exit12): |
1986 | movl (%ecx), %eax |
1987 | movl %eax, (%edx) |
1988 | movl 4(%ecx), %eax |
1989 | movl %eax, 4(%edx) |
1990 | movl 8(%ecx), %eax |
1991 | movl %eax, 8(%edx) |
1992 | # ifdef USE_AS_STPCPY |
1993 | lea 11(%edx), %eax |
1994 | # else |
1995 | movl %edi, %eax |
1996 | # endif |
1997 | RETURN |
1998 | |
1999 | .p2align 4 |
2000 | L(Exit13): |
2001 | movlpd (%ecx), %xmm0 |
2002 | movlpd %xmm0, (%edx) |
2003 | movlpd 5(%ecx), %xmm0 |
2004 | movlpd %xmm0, 5(%edx) |
2005 | # ifdef USE_AS_STPCPY |
2006 | lea 12(%edx), %eax |
2007 | # else |
2008 | movl %edi, %eax |
2009 | # endif |
2010 | RETURN |
2011 | |
2012 | .p2align 4 |
2013 | L(Exit14): |
2014 | movlpd (%ecx), %xmm0 |
2015 | movlpd %xmm0, (%edx) |
2016 | movlpd 6(%ecx), %xmm0 |
2017 | movlpd %xmm0, 6(%edx) |
2018 | # ifdef USE_AS_STPCPY |
2019 | lea 13(%edx), %eax |
2020 | # else |
2021 | movl %edi, %eax |
2022 | # endif |
2023 | RETURN |
2024 | |
2025 | .p2align 4 |
2026 | L(Exit15): |
2027 | movlpd (%ecx), %xmm0 |
2028 | movlpd %xmm0, (%edx) |
2029 | movlpd 7(%ecx), %xmm0 |
2030 | movlpd %xmm0, 7(%edx) |
2031 | # ifdef USE_AS_STPCPY |
2032 | lea 14(%edx), %eax |
2033 | # else |
2034 | movl %edi, %eax |
2035 | # endif |
2036 | RETURN |
2037 | |
2038 | CFI_POP (%edi) |
2039 | |
2040 | .p2align 4 |
2041 | L(ExitTail1): |
2042 | movb (%ecx), %al |
2043 | movb %al, (%edx) |
2044 | movl %edx, %eax |
2045 | RETURN1 |
2046 | |
2047 | .p2align 4 |
2048 | L(ExitTail2): |
2049 | movw (%ecx), %ax |
2050 | movw %ax, (%edx) |
2051 | # ifdef USE_AS_STPCPY |
2052 | lea 1(%edx), %eax |
2053 | # else |
2054 | movl %edx, %eax |
2055 | # endif |
2056 | RETURN1 |
2057 | |
2058 | .p2align 4 |
2059 | L(ExitTail3): |
2060 | movw (%ecx), %ax |
2061 | movw %ax, (%edx) |
2062 | movb 2(%ecx), %al |
2063 | movb %al, 2(%edx) |
2064 | # ifdef USE_AS_STPCPY |
2065 | lea 2(%edx), %eax |
2066 | # else |
2067 | movl %edx, %eax |
2068 | # endif |
2069 | RETURN1 |
2070 | |
2071 | .p2align 4 |
2072 | L(ExitTail4): |
2073 | movl (%ecx), %eax |
2074 | movl %eax, (%edx) |
2075 | # ifdef USE_AS_STPCPY |
2076 | lea 3(%edx), %eax |
2077 | # else |
2078 | movl %edx, %eax |
2079 | # endif |
2080 | RETURN1 |
2081 | |
2082 | .p2align 4 |
2083 | L(ExitTail5): |
2084 | movl (%ecx), %eax |
2085 | movl %eax, (%edx) |
2086 | movb 4(%ecx), %al |
2087 | movb %al, 4(%edx) |
2088 | # ifdef USE_AS_STPCPY |
2089 | lea 4(%edx), %eax |
2090 | # else |
2091 | movl %edx, %eax |
2092 | # endif |
2093 | RETURN1 |
2094 | |
2095 | .p2align 4 |
2096 | L(ExitTail6): |
2097 | movl (%ecx), %eax |
2098 | movl %eax, (%edx) |
2099 | movw 4(%ecx), %ax |
2100 | movw %ax, 4(%edx) |
2101 | # ifdef USE_AS_STPCPY |
2102 | lea 5(%edx), %eax |
2103 | # else |
2104 | movl %edx, %eax |
2105 | # endif |
2106 | RETURN1 |
2107 | |
2108 | .p2align 4 |
2109 | L(ExitTail7): |
2110 | movl (%ecx), %eax |
2111 | movl %eax, (%edx) |
2112 | movl 3(%ecx), %eax |
2113 | movl %eax, 3(%edx) |
2114 | # ifdef USE_AS_STPCPY |
2115 | lea 6(%edx), %eax |
2116 | # else |
2117 | movl %edx, %eax |
2118 | # endif |
2119 | RETURN1 |
2120 | |
2121 | .p2align 4 |
2122 | L(ExitTail8): |
2123 | movl (%ecx), %eax |
2124 | movl %eax, (%edx) |
2125 | movl 4(%ecx), %eax |
2126 | movl %eax, 4(%edx) |
2127 | # ifdef USE_AS_STPCPY |
2128 | lea 7(%edx), %eax |
2129 | # else |
2130 | movl %edx, %eax |
2131 | # endif |
2132 | RETURN1 |
2133 | |
2134 | .p2align 4 |
2135 | L(ExitTail9): |
2136 | movl (%ecx), %eax |
2137 | movl %eax, (%edx) |
2138 | movl 4(%ecx), %eax |
2139 | movl %eax, 4(%edx) |
2140 | movb 8(%ecx), %al |
2141 | movb %al, 8(%edx) |
2142 | # ifdef USE_AS_STPCPY |
2143 | lea 8(%edx), %eax |
2144 | # else |
2145 | movl %edx, %eax |
2146 | # endif |
2147 | RETURN1 |
2148 | |
2149 | .p2align 4 |
2150 | L(ExitTail10): |
2151 | movl (%ecx), %eax |
2152 | movl %eax, (%edx) |
2153 | movl 4(%ecx), %eax |
2154 | movl %eax, 4(%edx) |
2155 | movw 8(%ecx), %ax |
2156 | movw %ax, 8(%edx) |
2157 | # ifdef USE_AS_STPCPY |
2158 | lea 9(%edx), %eax |
2159 | # else |
2160 | movl %edx, %eax |
2161 | # endif |
2162 | RETURN1 |
2163 | |
2164 | .p2align 4 |
2165 | L(ExitTail11): |
2166 | movl (%ecx), %eax |
2167 | movl %eax, (%edx) |
2168 | movl 4(%ecx), %eax |
2169 | movl %eax, 4(%edx) |
2170 | movl 7(%ecx), %eax |
2171 | movl %eax, 7(%edx) |
2172 | # ifdef USE_AS_STPCPY |
2173 | lea 10(%edx), %eax |
2174 | # else |
2175 | movl %edx, %eax |
2176 | # endif |
2177 | RETURN1 |
2178 | |
2179 | .p2align 4 |
2180 | L(ExitTail12): |
2181 | movl (%ecx), %eax |
2182 | movl %eax, (%edx) |
2183 | movl 4(%ecx), %eax |
2184 | movl %eax, 4(%edx) |
2185 | movl 8(%ecx), %eax |
2186 | movl %eax, 8(%edx) |
2187 | # ifdef USE_AS_STPCPY |
2188 | lea 11(%edx), %eax |
2189 | # else |
2190 | movl %edx, %eax |
2191 | # endif |
2192 | RETURN1 |
2193 | |
2194 | .p2align 4 |
2195 | L(ExitTail13): |
2196 | movlpd (%ecx), %xmm0 |
2197 | movlpd %xmm0, (%edx) |
2198 | movlpd 5(%ecx), %xmm0 |
2199 | movlpd %xmm0, 5(%edx) |
2200 | # ifdef USE_AS_STPCPY |
2201 | lea 12(%edx), %eax |
2202 | # else |
2203 | movl %edx, %eax |
2204 | # endif |
2205 | RETURN1 |
2206 | |
2207 | .p2align 4 |
2208 | L(ExitTail14): |
2209 | movlpd (%ecx), %xmm0 |
2210 | movlpd %xmm0, (%edx) |
2211 | movlpd 6(%ecx), %xmm0 |
2212 | movlpd %xmm0, 6(%edx) |
2213 | # ifdef USE_AS_STPCPY |
2214 | lea 13(%edx), %eax |
2215 | # else |
2216 | movl %edx, %eax |
2217 | # endif |
2218 | RETURN1 |
2219 | |
2220 | .p2align 4 |
2221 | L(ExitTail15): |
2222 | movlpd (%ecx), %xmm0 |
2223 | movlpd %xmm0, (%edx) |
2224 | movlpd 7(%ecx), %xmm0 |
2225 | movlpd %xmm0, 7(%edx) |
2226 | # ifdef USE_AS_STPCPY |
2227 | lea 14(%edx), %eax |
2228 | # else |
2229 | movl %edx, %eax |
2230 | # endif |
2231 | RETURN1 |
2232 | |
2233 | .p2align 4 |
2234 | L(ExitTail16): |
2235 | movlpd (%ecx), %xmm0 |
2236 | movlpd %xmm0, (%edx) |
2237 | movlpd 8(%ecx), %xmm0 |
2238 | movlpd %xmm0, 8(%edx) |
2239 | # ifdef USE_AS_STPCPY |
2240 | lea 15(%edx), %eax |
2241 | # else |
2242 | movl %edx, %eax |
2243 | # endif |
2244 | RETURN1 |
2245 | |
2246 | END (STRCPY) |
2247 | # endif |
2248 | |
2249 | #endif |
2250 | |