1 | /* memcmp with SSSE3, wmemcmp with SSSE3 |
2 | Copyright (C) 2010-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #if IS_IN (libc) |
20 | |
21 | # include <sysdep.h> |
22 | |
23 | # ifndef MEMCMP |
24 | # define MEMCMP __memcmp_ssse3 |
25 | # endif |
26 | |
27 | # define CFI_PUSH(REG) \ |
28 | cfi_adjust_cfa_offset (4); \ |
29 | cfi_rel_offset (REG, 0) |
30 | |
31 | # define CFI_POP(REG) \ |
32 | cfi_adjust_cfa_offset (-4); \ |
33 | cfi_restore (REG) |
34 | |
35 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) |
36 | # define POP(REG) popl REG; CFI_POP (REG) |
37 | |
38 | # define PARMS 4 |
39 | # define BLK1 PARMS |
40 | # define BLK2 BLK1+4 |
41 | # define LEN BLK2+4 |
42 | # define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret |
43 | # define RETURN RETURN_END; cfi_restore_state; cfi_remember_state |
44 | |
45 | /* Warning! |
46 | wmemcmp has to use SIGNED comparison for elements. |
47 | memcmp has to use UNSIGNED comparison for elements. |
48 | */ |
49 | |
50 | atom_text_section |
51 | ENTRY (MEMCMP) |
52 | movl LEN(%esp), %ecx |
53 | |
54 | # ifdef USE_AS_WMEMCMP |
55 | shl $2, %ecx |
56 | test %ecx, %ecx |
57 | jz L(zero) |
58 | # endif |
59 | |
60 | movl BLK1(%esp), %eax |
61 | cmp $48, %ecx |
62 | movl BLK2(%esp), %edx |
63 | jae L(48bytesormore) |
64 | |
65 | # ifndef USE_AS_WMEMCMP |
66 | cmp $1, %ecx |
67 | jbe L(less1bytes) |
68 | # endif |
69 | |
70 | PUSH (%ebx) |
71 | add %ecx, %edx |
72 | add %ecx, %eax |
73 | jmp L(less48bytes) |
74 | |
75 | CFI_POP (%ebx) |
76 | |
77 | # ifndef USE_AS_WMEMCMP |
78 | .p2align 4 |
79 | L(less1bytes): |
80 | jb L(zero) |
81 | movb (%eax), %cl |
82 | cmp (%edx), %cl |
83 | je L(zero) |
84 | mov $1, %eax |
85 | ja L(1bytesend) |
86 | neg %eax |
87 | L(1bytesend): |
88 | ret |
89 | # endif |
90 | |
91 | .p2align 4 |
92 | L(zero): |
93 | xor %eax, %eax |
94 | ret |
95 | |
96 | .p2align 4 |
97 | L(48bytesormore): |
98 | PUSH (%ebx) |
99 | PUSH (%esi) |
100 | PUSH (%edi) |
101 | cfi_remember_state |
102 | movdqu (%eax), %xmm3 |
103 | movdqu (%edx), %xmm0 |
104 | movl %eax, %edi |
105 | movl %edx, %esi |
106 | pcmpeqb %xmm0, %xmm3 |
107 | pmovmskb %xmm3, %edx |
108 | lea 16(%edi), %edi |
109 | |
110 | sub $0xffff, %edx |
111 | lea 16(%esi), %esi |
112 | jnz L(less16bytes) |
113 | mov %edi, %edx |
114 | and $0xf, %edx |
115 | xor %edx, %edi |
116 | sub %edx, %esi |
117 | add %edx, %ecx |
118 | mov %esi, %edx |
119 | and $0xf, %edx |
120 | jz L(shr_0) |
121 | xor %edx, %esi |
122 | |
123 | # ifndef USE_AS_WMEMCMP |
124 | cmp $8, %edx |
125 | jae L(next_unaligned_table) |
126 | cmp $0, %edx |
127 | je L(shr_0) |
128 | cmp $1, %edx |
129 | je L(shr_1) |
130 | cmp $2, %edx |
131 | je L(shr_2) |
132 | cmp $3, %edx |
133 | je L(shr_3) |
134 | cmp $4, %edx |
135 | je L(shr_4) |
136 | cmp $5, %edx |
137 | je L(shr_5) |
138 | cmp $6, %edx |
139 | je L(shr_6) |
140 | jmp L(shr_7) |
141 | |
142 | .p2align 2 |
143 | L(next_unaligned_table): |
144 | cmp $8, %edx |
145 | je L(shr_8) |
146 | cmp $9, %edx |
147 | je L(shr_9) |
148 | cmp $10, %edx |
149 | je L(shr_10) |
150 | cmp $11, %edx |
151 | je L(shr_11) |
152 | cmp $12, %edx |
153 | je L(shr_12) |
154 | cmp $13, %edx |
155 | je L(shr_13) |
156 | cmp $14, %edx |
157 | je L(shr_14) |
158 | jmp L(shr_15) |
159 | # else |
160 | cmp $0, %edx |
161 | je L(shr_0) |
162 | cmp $4, %edx |
163 | je L(shr_4) |
164 | cmp $8, %edx |
165 | je L(shr_8) |
166 | jmp L(shr_12) |
167 | # endif |
168 | |
169 | .p2align 4 |
170 | L(shr_0): |
171 | cmp $80, %ecx |
172 | jae L(shr_0_gobble) |
173 | lea -48(%ecx), %ecx |
174 | xor %eax, %eax |
175 | movaps (%esi), %xmm1 |
176 | pcmpeqb (%edi), %xmm1 |
177 | movaps 16(%esi), %xmm2 |
178 | pcmpeqb 16(%edi), %xmm2 |
179 | pand %xmm1, %xmm2 |
180 | pmovmskb %xmm2, %edx |
181 | add $32, %edi |
182 | add $32, %esi |
183 | sub $0xffff, %edx |
184 | jnz L(exit) |
185 | |
186 | lea (%ecx, %edi,1), %eax |
187 | lea (%ecx, %esi,1), %edx |
188 | POP (%edi) |
189 | POP (%esi) |
190 | jmp L(less48bytes) |
191 | |
192 | cfi_restore_state |
193 | cfi_remember_state |
194 | .p2align 4 |
195 | L(shr_0_gobble): |
196 | lea -48(%ecx), %ecx |
197 | movdqa (%esi), %xmm0 |
198 | xor %eax, %eax |
199 | pcmpeqb (%edi), %xmm0 |
200 | sub $32, %ecx |
201 | movdqa 16(%esi), %xmm2 |
202 | pcmpeqb 16(%edi), %xmm2 |
203 | L(shr_0_gobble_loop): |
204 | pand %xmm0, %xmm2 |
205 | sub $32, %ecx |
206 | pmovmskb %xmm2, %edx |
207 | movdqa %xmm0, %xmm1 |
208 | movdqa 32(%esi), %xmm0 |
209 | movdqa 48(%esi), %xmm2 |
210 | sbb $0xffff, %edx |
211 | pcmpeqb 32(%edi), %xmm0 |
212 | pcmpeqb 48(%edi), %xmm2 |
213 | lea 32(%edi), %edi |
214 | lea 32(%esi), %esi |
215 | jz L(shr_0_gobble_loop) |
216 | |
217 | pand %xmm0, %xmm2 |
218 | cmp $0, %ecx |
219 | jge L(shr_0_gobble_loop_next) |
220 | inc %edx |
221 | add $32, %ecx |
222 | L(shr_0_gobble_loop_next): |
223 | test %edx, %edx |
224 | jnz L(exit) |
225 | |
226 | pmovmskb %xmm2, %edx |
227 | movdqa %xmm0, %xmm1 |
228 | lea 32(%edi), %edi |
229 | lea 32(%esi), %esi |
230 | sub $0xffff, %edx |
231 | jnz L(exit) |
232 | lea (%ecx, %edi,1), %eax |
233 | lea (%ecx, %esi,1), %edx |
234 | POP (%edi) |
235 | POP (%esi) |
236 | jmp L(less48bytes) |
237 | |
238 | # ifndef USE_AS_WMEMCMP |
239 | cfi_restore_state |
240 | cfi_remember_state |
241 | .p2align 4 |
242 | L(shr_1): |
243 | cmp $80, %ecx |
244 | lea -48(%ecx), %ecx |
245 | mov %edx, %eax |
246 | jae L(shr_1_gobble) |
247 | |
248 | movdqa 16(%esi), %xmm1 |
249 | movdqa %xmm1, %xmm2 |
250 | palignr $1,(%esi), %xmm1 |
251 | pcmpeqb (%edi), %xmm1 |
252 | |
253 | movdqa 32(%esi), %xmm3 |
254 | palignr $1,%xmm2, %xmm3 |
255 | pcmpeqb 16(%edi), %xmm3 |
256 | |
257 | pand %xmm1, %xmm3 |
258 | pmovmskb %xmm3, %edx |
259 | lea 32(%edi), %edi |
260 | lea 32(%esi), %esi |
261 | sub $0xffff, %edx |
262 | jnz L(exit) |
263 | lea (%ecx, %edi,1), %eax |
264 | lea 1(%ecx, %esi,1), %edx |
265 | POP (%edi) |
266 | POP (%esi) |
267 | jmp L(less48bytes) |
268 | |
269 | cfi_restore_state |
270 | cfi_remember_state |
271 | .p2align 4 |
272 | L(shr_1_gobble): |
273 | sub $32, %ecx |
274 | movdqa 16(%esi), %xmm0 |
275 | palignr $1,(%esi), %xmm0 |
276 | pcmpeqb (%edi), %xmm0 |
277 | |
278 | movdqa 32(%esi), %xmm3 |
279 | palignr $1,16(%esi), %xmm3 |
280 | pcmpeqb 16(%edi), %xmm3 |
281 | |
282 | L(shr_1_gobble_loop): |
283 | pand %xmm0, %xmm3 |
284 | sub $32, %ecx |
285 | pmovmskb %xmm3, %edx |
286 | movdqa %xmm0, %xmm1 |
287 | |
288 | movdqa 64(%esi), %xmm3 |
289 | palignr $1,48(%esi), %xmm3 |
290 | sbb $0xffff, %edx |
291 | movdqa 48(%esi), %xmm0 |
292 | palignr $1,32(%esi), %xmm0 |
293 | pcmpeqb 32(%edi), %xmm0 |
294 | lea 32(%esi), %esi |
295 | pcmpeqb 48(%edi), %xmm3 |
296 | |
297 | lea 32(%edi), %edi |
298 | jz L(shr_1_gobble_loop) |
299 | pand %xmm0, %xmm3 |
300 | |
301 | cmp $0, %ecx |
302 | jge L(shr_1_gobble_next) |
303 | inc %edx |
304 | add $32, %ecx |
305 | L(shr_1_gobble_next): |
306 | test %edx, %edx |
307 | jnz L(exit) |
308 | |
309 | pmovmskb %xmm3, %edx |
310 | movdqa %xmm0, %xmm1 |
311 | lea 32(%edi), %edi |
312 | lea 32(%esi), %esi |
313 | sub $0xffff, %edx |
314 | jnz L(exit) |
315 | |
316 | lea (%ecx, %edi,1), %eax |
317 | lea 1(%ecx, %esi,1), %edx |
318 | POP (%edi) |
319 | POP (%esi) |
320 | jmp L(less48bytes) |
321 | |
322 | |
323 | cfi_restore_state |
324 | cfi_remember_state |
325 | .p2align 4 |
326 | L(shr_2): |
327 | cmp $80, %ecx |
328 | lea -48(%ecx), %ecx |
329 | mov %edx, %eax |
330 | jae L(shr_2_gobble) |
331 | |
332 | movdqa 16(%esi), %xmm1 |
333 | movdqa %xmm1, %xmm2 |
334 | palignr $2,(%esi), %xmm1 |
335 | pcmpeqb (%edi), %xmm1 |
336 | |
337 | movdqa 32(%esi), %xmm3 |
338 | palignr $2,%xmm2, %xmm3 |
339 | pcmpeqb 16(%edi), %xmm3 |
340 | |
341 | pand %xmm1, %xmm3 |
342 | pmovmskb %xmm3, %edx |
343 | lea 32(%edi), %edi |
344 | lea 32(%esi), %esi |
345 | sub $0xffff, %edx |
346 | jnz L(exit) |
347 | lea (%ecx, %edi,1), %eax |
348 | lea 2(%ecx, %esi,1), %edx |
349 | POP (%edi) |
350 | POP (%esi) |
351 | jmp L(less48bytes) |
352 | |
353 | cfi_restore_state |
354 | cfi_remember_state |
355 | .p2align 4 |
356 | L(shr_2_gobble): |
357 | sub $32, %ecx |
358 | movdqa 16(%esi), %xmm0 |
359 | palignr $2,(%esi), %xmm0 |
360 | pcmpeqb (%edi), %xmm0 |
361 | |
362 | movdqa 32(%esi), %xmm3 |
363 | palignr $2,16(%esi), %xmm3 |
364 | pcmpeqb 16(%edi), %xmm3 |
365 | |
366 | L(shr_2_gobble_loop): |
367 | pand %xmm0, %xmm3 |
368 | sub $32, %ecx |
369 | pmovmskb %xmm3, %edx |
370 | movdqa %xmm0, %xmm1 |
371 | |
372 | movdqa 64(%esi), %xmm3 |
373 | palignr $2,48(%esi), %xmm3 |
374 | sbb $0xffff, %edx |
375 | movdqa 48(%esi), %xmm0 |
376 | palignr $2,32(%esi), %xmm0 |
377 | pcmpeqb 32(%edi), %xmm0 |
378 | lea 32(%esi), %esi |
379 | pcmpeqb 48(%edi), %xmm3 |
380 | |
381 | lea 32(%edi), %edi |
382 | jz L(shr_2_gobble_loop) |
383 | pand %xmm0, %xmm3 |
384 | |
385 | cmp $0, %ecx |
386 | jge L(shr_2_gobble_next) |
387 | inc %edx |
388 | add $32, %ecx |
389 | L(shr_2_gobble_next): |
390 | test %edx, %edx |
391 | jnz L(exit) |
392 | |
393 | pmovmskb %xmm3, %edx |
394 | movdqa %xmm0, %xmm1 |
395 | lea 32(%edi), %edi |
396 | lea 32(%esi), %esi |
397 | sub $0xffff, %edx |
398 | jnz L(exit) |
399 | |
400 | lea (%ecx, %edi,1), %eax |
401 | lea 2(%ecx, %esi,1), %edx |
402 | POP (%edi) |
403 | POP (%esi) |
404 | jmp L(less48bytes) |
405 | |
406 | cfi_restore_state |
407 | cfi_remember_state |
408 | .p2align 4 |
409 | L(shr_3): |
410 | cmp $80, %ecx |
411 | lea -48(%ecx), %ecx |
412 | mov %edx, %eax |
413 | jae L(shr_3_gobble) |
414 | |
415 | movdqa 16(%esi), %xmm1 |
416 | movdqa %xmm1, %xmm2 |
417 | palignr $3,(%esi), %xmm1 |
418 | pcmpeqb (%edi), %xmm1 |
419 | |
420 | movdqa 32(%esi), %xmm3 |
421 | palignr $3,%xmm2, %xmm3 |
422 | pcmpeqb 16(%edi), %xmm3 |
423 | |
424 | pand %xmm1, %xmm3 |
425 | pmovmskb %xmm3, %edx |
426 | lea 32(%edi), %edi |
427 | lea 32(%esi), %esi |
428 | sub $0xffff, %edx |
429 | jnz L(exit) |
430 | lea (%ecx, %edi,1), %eax |
431 | lea 3(%ecx, %esi,1), %edx |
432 | POP (%edi) |
433 | POP (%esi) |
434 | jmp L(less48bytes) |
435 | |
436 | cfi_restore_state |
437 | cfi_remember_state |
438 | .p2align 4 |
439 | L(shr_3_gobble): |
440 | sub $32, %ecx |
441 | movdqa 16(%esi), %xmm0 |
442 | palignr $3,(%esi), %xmm0 |
443 | pcmpeqb (%edi), %xmm0 |
444 | |
445 | movdqa 32(%esi), %xmm3 |
446 | palignr $3,16(%esi), %xmm3 |
447 | pcmpeqb 16(%edi), %xmm3 |
448 | |
449 | L(shr_3_gobble_loop): |
450 | pand %xmm0, %xmm3 |
451 | sub $32, %ecx |
452 | pmovmskb %xmm3, %edx |
453 | movdqa %xmm0, %xmm1 |
454 | |
455 | movdqa 64(%esi), %xmm3 |
456 | palignr $3,48(%esi), %xmm3 |
457 | sbb $0xffff, %edx |
458 | movdqa 48(%esi), %xmm0 |
459 | palignr $3,32(%esi), %xmm0 |
460 | pcmpeqb 32(%edi), %xmm0 |
461 | lea 32(%esi), %esi |
462 | pcmpeqb 48(%edi), %xmm3 |
463 | |
464 | lea 32(%edi), %edi |
465 | jz L(shr_3_gobble_loop) |
466 | pand %xmm0, %xmm3 |
467 | |
468 | cmp $0, %ecx |
469 | jge L(shr_3_gobble_next) |
470 | inc %edx |
471 | add $32, %ecx |
472 | L(shr_3_gobble_next): |
473 | test %edx, %edx |
474 | jnz L(exit) |
475 | |
476 | pmovmskb %xmm3, %edx |
477 | movdqa %xmm0, %xmm1 |
478 | lea 32(%edi), %edi |
479 | lea 32(%esi), %esi |
480 | sub $0xffff, %edx |
481 | jnz L(exit) |
482 | |
483 | lea (%ecx, %edi,1), %eax |
484 | lea 3(%ecx, %esi,1), %edx |
485 | POP (%edi) |
486 | POP (%esi) |
487 | jmp L(less48bytes) |
488 | # endif |
489 | |
490 | cfi_restore_state |
491 | cfi_remember_state |
492 | .p2align 4 |
493 | L(shr_4): |
494 | cmp $80, %ecx |
495 | lea -48(%ecx), %ecx |
496 | mov %edx, %eax |
497 | jae L(shr_4_gobble) |
498 | |
499 | movdqa 16(%esi), %xmm1 |
500 | movdqa %xmm1, %xmm2 |
501 | palignr $4,(%esi), %xmm1 |
502 | pcmpeqb (%edi), %xmm1 |
503 | |
504 | movdqa 32(%esi), %xmm3 |
505 | palignr $4,%xmm2, %xmm3 |
506 | pcmpeqb 16(%edi), %xmm3 |
507 | |
508 | pand %xmm1, %xmm3 |
509 | pmovmskb %xmm3, %edx |
510 | lea 32(%edi), %edi |
511 | lea 32(%esi), %esi |
512 | sub $0xffff, %edx |
513 | jnz L(exit) |
514 | lea (%ecx, %edi,1), %eax |
515 | lea 4(%ecx, %esi,1), %edx |
516 | POP (%edi) |
517 | POP (%esi) |
518 | jmp L(less48bytes) |
519 | |
520 | cfi_restore_state |
521 | cfi_remember_state |
522 | .p2align 4 |
523 | L(shr_4_gobble): |
524 | sub $32, %ecx |
525 | movdqa 16(%esi), %xmm0 |
526 | palignr $4,(%esi), %xmm0 |
527 | pcmpeqb (%edi), %xmm0 |
528 | |
529 | movdqa 32(%esi), %xmm3 |
530 | palignr $4,16(%esi), %xmm3 |
531 | pcmpeqb 16(%edi), %xmm3 |
532 | |
533 | L(shr_4_gobble_loop): |
534 | pand %xmm0, %xmm3 |
535 | sub $32, %ecx |
536 | pmovmskb %xmm3, %edx |
537 | movdqa %xmm0, %xmm1 |
538 | |
539 | movdqa 64(%esi), %xmm3 |
540 | palignr $4,48(%esi), %xmm3 |
541 | sbb $0xffff, %edx |
542 | movdqa 48(%esi), %xmm0 |
543 | palignr $4,32(%esi), %xmm0 |
544 | pcmpeqb 32(%edi), %xmm0 |
545 | lea 32(%esi), %esi |
546 | pcmpeqb 48(%edi), %xmm3 |
547 | |
548 | lea 32(%edi), %edi |
549 | jz L(shr_4_gobble_loop) |
550 | pand %xmm0, %xmm3 |
551 | |
552 | cmp $0, %ecx |
553 | jge L(shr_4_gobble_next) |
554 | inc %edx |
555 | add $32, %ecx |
556 | L(shr_4_gobble_next): |
557 | test %edx, %edx |
558 | jnz L(exit) |
559 | |
560 | pmovmskb %xmm3, %edx |
561 | movdqa %xmm0, %xmm1 |
562 | lea 32(%edi), %edi |
563 | lea 32(%esi), %esi |
564 | sub $0xffff, %edx |
565 | jnz L(exit) |
566 | |
567 | lea (%ecx, %edi,1), %eax |
568 | lea 4(%ecx, %esi,1), %edx |
569 | POP (%edi) |
570 | POP (%esi) |
571 | jmp L(less48bytes) |
572 | |
573 | # ifndef USE_AS_WMEMCMP |
574 | cfi_restore_state |
575 | cfi_remember_state |
576 | .p2align 4 |
577 | L(shr_5): |
578 | cmp $80, %ecx |
579 | lea -48(%ecx), %ecx |
580 | mov %edx, %eax |
581 | jae L(shr_5_gobble) |
582 | |
583 | movdqa 16(%esi), %xmm1 |
584 | movdqa %xmm1, %xmm2 |
585 | palignr $5,(%esi), %xmm1 |
586 | pcmpeqb (%edi), %xmm1 |
587 | |
588 | movdqa 32(%esi), %xmm3 |
589 | palignr $5,%xmm2, %xmm3 |
590 | pcmpeqb 16(%edi), %xmm3 |
591 | |
592 | pand %xmm1, %xmm3 |
593 | pmovmskb %xmm3, %edx |
594 | lea 32(%edi), %edi |
595 | lea 32(%esi), %esi |
596 | sub $0xffff, %edx |
597 | jnz L(exit) |
598 | lea (%ecx, %edi,1), %eax |
599 | lea 5(%ecx, %esi,1), %edx |
600 | POP (%edi) |
601 | POP (%esi) |
602 | jmp L(less48bytes) |
603 | |
604 | cfi_restore_state |
605 | cfi_remember_state |
606 | .p2align 4 |
607 | L(shr_5_gobble): |
608 | sub $32, %ecx |
609 | movdqa 16(%esi), %xmm0 |
610 | palignr $5,(%esi), %xmm0 |
611 | pcmpeqb (%edi), %xmm0 |
612 | |
613 | movdqa 32(%esi), %xmm3 |
614 | palignr $5,16(%esi), %xmm3 |
615 | pcmpeqb 16(%edi), %xmm3 |
616 | |
617 | L(shr_5_gobble_loop): |
618 | pand %xmm0, %xmm3 |
619 | sub $32, %ecx |
620 | pmovmskb %xmm3, %edx |
621 | movdqa %xmm0, %xmm1 |
622 | |
623 | movdqa 64(%esi), %xmm3 |
624 | palignr $5,48(%esi), %xmm3 |
625 | sbb $0xffff, %edx |
626 | movdqa 48(%esi), %xmm0 |
627 | palignr $5,32(%esi), %xmm0 |
628 | pcmpeqb 32(%edi), %xmm0 |
629 | lea 32(%esi), %esi |
630 | pcmpeqb 48(%edi), %xmm3 |
631 | |
632 | lea 32(%edi), %edi |
633 | jz L(shr_5_gobble_loop) |
634 | pand %xmm0, %xmm3 |
635 | |
636 | cmp $0, %ecx |
637 | jge L(shr_5_gobble_next) |
638 | inc %edx |
639 | add $32, %ecx |
640 | L(shr_5_gobble_next): |
641 | test %edx, %edx |
642 | jnz L(exit) |
643 | |
644 | pmovmskb %xmm3, %edx |
645 | movdqa %xmm0, %xmm1 |
646 | lea 32(%edi), %edi |
647 | lea 32(%esi), %esi |
648 | sub $0xffff, %edx |
649 | jnz L(exit) |
650 | |
651 | lea (%ecx, %edi,1), %eax |
652 | lea 5(%ecx, %esi,1), %edx |
653 | POP (%edi) |
654 | POP (%esi) |
655 | jmp L(less48bytes) |
656 | |
657 | cfi_restore_state |
658 | cfi_remember_state |
659 | .p2align 4 |
660 | L(shr_6): |
661 | cmp $80, %ecx |
662 | lea -48(%ecx), %ecx |
663 | mov %edx, %eax |
664 | jae L(shr_6_gobble) |
665 | |
666 | movdqa 16(%esi), %xmm1 |
667 | movdqa %xmm1, %xmm2 |
668 | palignr $6,(%esi), %xmm1 |
669 | pcmpeqb (%edi), %xmm1 |
670 | |
671 | movdqa 32(%esi), %xmm3 |
672 | palignr $6,%xmm2, %xmm3 |
673 | pcmpeqb 16(%edi), %xmm3 |
674 | |
675 | pand %xmm1, %xmm3 |
676 | pmovmskb %xmm3, %edx |
677 | lea 32(%edi), %edi |
678 | lea 32(%esi), %esi |
679 | sub $0xffff, %edx |
680 | jnz L(exit) |
681 | lea (%ecx, %edi,1), %eax |
682 | lea 6(%ecx, %esi,1), %edx |
683 | POP (%edi) |
684 | POP (%esi) |
685 | jmp L(less48bytes) |
686 | |
687 | cfi_restore_state |
688 | cfi_remember_state |
689 | .p2align 4 |
690 | L(shr_6_gobble): |
691 | sub $32, %ecx |
692 | movdqa 16(%esi), %xmm0 |
693 | palignr $6,(%esi), %xmm0 |
694 | pcmpeqb (%edi), %xmm0 |
695 | |
696 | movdqa 32(%esi), %xmm3 |
697 | palignr $6,16(%esi), %xmm3 |
698 | pcmpeqb 16(%edi), %xmm3 |
699 | |
700 | L(shr_6_gobble_loop): |
701 | pand %xmm0, %xmm3 |
702 | sub $32, %ecx |
703 | pmovmskb %xmm3, %edx |
704 | movdqa %xmm0, %xmm1 |
705 | |
706 | movdqa 64(%esi), %xmm3 |
707 | palignr $6,48(%esi), %xmm3 |
708 | sbb $0xffff, %edx |
709 | movdqa 48(%esi), %xmm0 |
710 | palignr $6,32(%esi), %xmm0 |
711 | pcmpeqb 32(%edi), %xmm0 |
712 | lea 32(%esi), %esi |
713 | pcmpeqb 48(%edi), %xmm3 |
714 | |
715 | lea 32(%edi), %edi |
716 | jz L(shr_6_gobble_loop) |
717 | pand %xmm0, %xmm3 |
718 | |
719 | cmp $0, %ecx |
720 | jge L(shr_6_gobble_next) |
721 | inc %edx |
722 | add $32, %ecx |
723 | L(shr_6_gobble_next): |
724 | test %edx, %edx |
725 | jnz L(exit) |
726 | |
727 | pmovmskb %xmm3, %edx |
728 | movdqa %xmm0, %xmm1 |
729 | lea 32(%edi), %edi |
730 | lea 32(%esi), %esi |
731 | sub $0xffff, %edx |
732 | jnz L(exit) |
733 | |
734 | lea (%ecx, %edi,1), %eax |
735 | lea 6(%ecx, %esi,1), %edx |
736 | POP (%edi) |
737 | POP (%esi) |
738 | jmp L(less48bytes) |
739 | |
740 | cfi_restore_state |
741 | cfi_remember_state |
742 | .p2align 4 |
743 | L(shr_7): |
744 | cmp $80, %ecx |
745 | lea -48(%ecx), %ecx |
746 | mov %edx, %eax |
747 | jae L(shr_7_gobble) |
748 | |
749 | movdqa 16(%esi), %xmm1 |
750 | movdqa %xmm1, %xmm2 |
751 | palignr $7,(%esi), %xmm1 |
752 | pcmpeqb (%edi), %xmm1 |
753 | |
754 | movdqa 32(%esi), %xmm3 |
755 | palignr $7,%xmm2, %xmm3 |
756 | pcmpeqb 16(%edi), %xmm3 |
757 | |
758 | pand %xmm1, %xmm3 |
759 | pmovmskb %xmm3, %edx |
760 | lea 32(%edi), %edi |
761 | lea 32(%esi), %esi |
762 | sub $0xffff, %edx |
763 | jnz L(exit) |
764 | lea (%ecx, %edi,1), %eax |
765 | lea 7(%ecx, %esi,1), %edx |
766 | POP (%edi) |
767 | POP (%esi) |
768 | jmp L(less48bytes) |
769 | |
770 | cfi_restore_state |
771 | cfi_remember_state |
772 | .p2align 4 |
773 | L(shr_7_gobble): |
774 | sub $32, %ecx |
775 | movdqa 16(%esi), %xmm0 |
776 | palignr $7,(%esi), %xmm0 |
777 | pcmpeqb (%edi), %xmm0 |
778 | |
779 | movdqa 32(%esi), %xmm3 |
780 | palignr $7,16(%esi), %xmm3 |
781 | pcmpeqb 16(%edi), %xmm3 |
782 | |
783 | L(shr_7_gobble_loop): |
784 | pand %xmm0, %xmm3 |
785 | sub $32, %ecx |
786 | pmovmskb %xmm3, %edx |
787 | movdqa %xmm0, %xmm1 |
788 | |
789 | movdqa 64(%esi), %xmm3 |
790 | palignr $7,48(%esi), %xmm3 |
791 | sbb $0xffff, %edx |
792 | movdqa 48(%esi), %xmm0 |
793 | palignr $7,32(%esi), %xmm0 |
794 | pcmpeqb 32(%edi), %xmm0 |
795 | lea 32(%esi), %esi |
796 | pcmpeqb 48(%edi), %xmm3 |
797 | |
798 | lea 32(%edi), %edi |
799 | jz L(shr_7_gobble_loop) |
800 | pand %xmm0, %xmm3 |
801 | |
802 | cmp $0, %ecx |
803 | jge L(shr_7_gobble_next) |
804 | inc %edx |
805 | add $32, %ecx |
806 | L(shr_7_gobble_next): |
807 | test %edx, %edx |
808 | jnz L(exit) |
809 | |
810 | pmovmskb %xmm3, %edx |
811 | movdqa %xmm0, %xmm1 |
812 | lea 32(%edi), %edi |
813 | lea 32(%esi), %esi |
814 | sub $0xffff, %edx |
815 | jnz L(exit) |
816 | |
817 | lea (%ecx, %edi,1), %eax |
818 | lea 7(%ecx, %esi,1), %edx |
819 | POP (%edi) |
820 | POP (%esi) |
821 | jmp L(less48bytes) |
822 | # endif |
823 | |
824 | cfi_restore_state |
825 | cfi_remember_state |
826 | .p2align 4 |
827 | L(shr_8): |
828 | cmp $80, %ecx |
829 | lea -48(%ecx), %ecx |
830 | mov %edx, %eax |
831 | jae L(shr_8_gobble) |
832 | |
833 | movdqa 16(%esi), %xmm1 |
834 | movdqa %xmm1, %xmm2 |
835 | palignr $8,(%esi), %xmm1 |
836 | pcmpeqb (%edi), %xmm1 |
837 | |
838 | movdqa 32(%esi), %xmm3 |
839 | palignr $8,%xmm2, %xmm3 |
840 | pcmpeqb 16(%edi), %xmm3 |
841 | |
842 | pand %xmm1, %xmm3 |
843 | pmovmskb %xmm3, %edx |
844 | lea 32(%edi), %edi |
845 | lea 32(%esi), %esi |
846 | sub $0xffff, %edx |
847 | jnz L(exit) |
848 | lea (%ecx, %edi,1), %eax |
849 | lea 8(%ecx, %esi,1), %edx |
850 | POP (%edi) |
851 | POP (%esi) |
852 | jmp L(less48bytes) |
853 | |
854 | cfi_restore_state |
855 | cfi_remember_state |
856 | .p2align 4 |
857 | L(shr_8_gobble): |
858 | sub $32, %ecx |
859 | movdqa 16(%esi), %xmm0 |
860 | palignr $8,(%esi), %xmm0 |
861 | pcmpeqb (%edi), %xmm0 |
862 | |
863 | movdqa 32(%esi), %xmm3 |
864 | palignr $8,16(%esi), %xmm3 |
865 | pcmpeqb 16(%edi), %xmm3 |
866 | |
867 | L(shr_8_gobble_loop): |
868 | pand %xmm0, %xmm3 |
869 | sub $32, %ecx |
870 | pmovmskb %xmm3, %edx |
871 | movdqa %xmm0, %xmm1 |
872 | |
873 | movdqa 64(%esi), %xmm3 |
874 | palignr $8,48(%esi), %xmm3 |
875 | sbb $0xffff, %edx |
876 | movdqa 48(%esi), %xmm0 |
877 | palignr $8,32(%esi), %xmm0 |
878 | pcmpeqb 32(%edi), %xmm0 |
879 | lea 32(%esi), %esi |
880 | pcmpeqb 48(%edi), %xmm3 |
881 | |
882 | lea 32(%edi), %edi |
883 | jz L(shr_8_gobble_loop) |
884 | pand %xmm0, %xmm3 |
885 | |
886 | cmp $0, %ecx |
887 | jge L(shr_8_gobble_next) |
888 | inc %edx |
889 | add $32, %ecx |
890 | L(shr_8_gobble_next): |
891 | test %edx, %edx |
892 | jnz L(exit) |
893 | |
894 | pmovmskb %xmm3, %edx |
895 | movdqa %xmm0, %xmm1 |
896 | lea 32(%edi), %edi |
897 | lea 32(%esi), %esi |
898 | sub $0xffff, %edx |
899 | jnz L(exit) |
900 | |
901 | lea (%ecx, %edi,1), %eax |
902 | lea 8(%ecx, %esi,1), %edx |
903 | POP (%edi) |
904 | POP (%esi) |
905 | jmp L(less48bytes) |
906 | |
907 | # ifndef USE_AS_WMEMCMP |
908 | cfi_restore_state |
909 | cfi_remember_state |
910 | .p2align 4 |
911 | L(shr_9): |
912 | cmp $80, %ecx |
913 | lea -48(%ecx), %ecx |
914 | mov %edx, %eax |
915 | jae L(shr_9_gobble) |
916 | |
917 | movdqa 16(%esi), %xmm1 |
918 | movdqa %xmm1, %xmm2 |
919 | palignr $9,(%esi), %xmm1 |
920 | pcmpeqb (%edi), %xmm1 |
921 | |
922 | movdqa 32(%esi), %xmm3 |
923 | palignr $9,%xmm2, %xmm3 |
924 | pcmpeqb 16(%edi), %xmm3 |
925 | |
926 | pand %xmm1, %xmm3 |
927 | pmovmskb %xmm3, %edx |
928 | lea 32(%edi), %edi |
929 | lea 32(%esi), %esi |
930 | sub $0xffff, %edx |
931 | jnz L(exit) |
932 | lea (%ecx, %edi,1), %eax |
933 | lea 9(%ecx, %esi,1), %edx |
934 | POP (%edi) |
935 | POP (%esi) |
936 | jmp L(less48bytes) |
937 | |
938 | cfi_restore_state |
939 | cfi_remember_state |
940 | .p2align 4 |
941 | L(shr_9_gobble): |
942 | sub $32, %ecx |
943 | movdqa 16(%esi), %xmm0 |
944 | palignr $9,(%esi), %xmm0 |
945 | pcmpeqb (%edi), %xmm0 |
946 | |
947 | movdqa 32(%esi), %xmm3 |
948 | palignr $9,16(%esi), %xmm3 |
949 | pcmpeqb 16(%edi), %xmm3 |
950 | |
951 | L(shr_9_gobble_loop): |
952 | pand %xmm0, %xmm3 |
953 | sub $32, %ecx |
954 | pmovmskb %xmm3, %edx |
955 | movdqa %xmm0, %xmm1 |
956 | |
957 | movdqa 64(%esi), %xmm3 |
958 | palignr $9,48(%esi), %xmm3 |
959 | sbb $0xffff, %edx |
960 | movdqa 48(%esi), %xmm0 |
961 | palignr $9,32(%esi), %xmm0 |
962 | pcmpeqb 32(%edi), %xmm0 |
963 | lea 32(%esi), %esi |
964 | pcmpeqb 48(%edi), %xmm3 |
965 | |
966 | lea 32(%edi), %edi |
967 | jz L(shr_9_gobble_loop) |
968 | pand %xmm0, %xmm3 |
969 | |
970 | cmp $0, %ecx |
971 | jge L(shr_9_gobble_next) |
972 | inc %edx |
973 | add $32, %ecx |
974 | L(shr_9_gobble_next): |
975 | test %edx, %edx |
976 | jnz L(exit) |
977 | |
978 | pmovmskb %xmm3, %edx |
979 | movdqa %xmm0, %xmm1 |
980 | lea 32(%edi), %edi |
981 | lea 32(%esi), %esi |
982 | sub $0xffff, %edx |
983 | jnz L(exit) |
984 | |
985 | lea (%ecx, %edi,1), %eax |
986 | lea 9(%ecx, %esi,1), %edx |
987 | POP (%edi) |
988 | POP (%esi) |
989 | jmp L(less48bytes) |
990 | |
991 | cfi_restore_state |
992 | cfi_remember_state |
993 | .p2align 4 |
994 | L(shr_10): |
995 | cmp $80, %ecx |
996 | lea -48(%ecx), %ecx |
997 | mov %edx, %eax |
998 | jae L(shr_10_gobble) |
999 | |
1000 | movdqa 16(%esi), %xmm1 |
1001 | movdqa %xmm1, %xmm2 |
1002 | palignr $10, (%esi), %xmm1 |
1003 | pcmpeqb (%edi), %xmm1 |
1004 | |
1005 | movdqa 32(%esi), %xmm3 |
1006 | palignr $10,%xmm2, %xmm3 |
1007 | pcmpeqb 16(%edi), %xmm3 |
1008 | |
1009 | pand %xmm1, %xmm3 |
1010 | pmovmskb %xmm3, %edx |
1011 | lea 32(%edi), %edi |
1012 | lea 32(%esi), %esi |
1013 | sub $0xffff, %edx |
1014 | jnz L(exit) |
1015 | lea (%ecx, %edi,1), %eax |
1016 | lea 10(%ecx, %esi,1), %edx |
1017 | POP (%edi) |
1018 | POP (%esi) |
1019 | jmp L(less48bytes) |
1020 | |
1021 | cfi_restore_state |
1022 | cfi_remember_state |
1023 | .p2align 4 |
1024 | L(shr_10_gobble): |
1025 | sub $32, %ecx |
1026 | movdqa 16(%esi), %xmm0 |
1027 | palignr $10, (%esi), %xmm0 |
1028 | pcmpeqb (%edi), %xmm0 |
1029 | |
1030 | movdqa 32(%esi), %xmm3 |
1031 | palignr $10, 16(%esi), %xmm3 |
1032 | pcmpeqb 16(%edi), %xmm3 |
1033 | |
1034 | L(shr_10_gobble_loop): |
1035 | pand %xmm0, %xmm3 |
1036 | sub $32, %ecx |
1037 | pmovmskb %xmm3, %edx |
1038 | movdqa %xmm0, %xmm1 |
1039 | |
1040 | movdqa 64(%esi), %xmm3 |
1041 | palignr $10,48(%esi), %xmm3 |
1042 | sbb $0xffff, %edx |
1043 | movdqa 48(%esi), %xmm0 |
1044 | palignr $10,32(%esi), %xmm0 |
1045 | pcmpeqb 32(%edi), %xmm0 |
1046 | lea 32(%esi), %esi |
1047 | pcmpeqb 48(%edi), %xmm3 |
1048 | |
1049 | lea 32(%edi), %edi |
1050 | jz L(shr_10_gobble_loop) |
1051 | pand %xmm0, %xmm3 |
1052 | |
1053 | cmp $0, %ecx |
1054 | jge L(shr_10_gobble_next) |
1055 | inc %edx |
1056 | add $32, %ecx |
1057 | L(shr_10_gobble_next): |
1058 | test %edx, %edx |
1059 | jnz L(exit) |
1060 | |
1061 | pmovmskb %xmm3, %edx |
1062 | movdqa %xmm0, %xmm1 |
1063 | lea 32(%edi), %edi |
1064 | lea 32(%esi), %esi |
1065 | sub $0xffff, %edx |
1066 | jnz L(exit) |
1067 | |
1068 | lea (%ecx, %edi,1), %eax |
1069 | lea 10(%ecx, %esi,1), %edx |
1070 | POP (%edi) |
1071 | POP (%esi) |
1072 | jmp L(less48bytes) |
1073 | |
1074 | cfi_restore_state |
1075 | cfi_remember_state |
1076 | .p2align 4 |
1077 | L(shr_11): |
1078 | cmp $80, %ecx |
1079 | lea -48(%ecx), %ecx |
1080 | mov %edx, %eax |
1081 | jae L(shr_11_gobble) |
1082 | |
1083 | movdqa 16(%esi), %xmm1 |
1084 | movdqa %xmm1, %xmm2 |
1085 | palignr $11, (%esi), %xmm1 |
1086 | pcmpeqb (%edi), %xmm1 |
1087 | |
1088 | movdqa 32(%esi), %xmm3 |
1089 | palignr $11, %xmm2, %xmm3 |
1090 | pcmpeqb 16(%edi), %xmm3 |
1091 | |
1092 | pand %xmm1, %xmm3 |
1093 | pmovmskb %xmm3, %edx |
1094 | lea 32(%edi), %edi |
1095 | lea 32(%esi), %esi |
1096 | sub $0xffff, %edx |
1097 | jnz L(exit) |
1098 | lea (%ecx, %edi,1), %eax |
1099 | lea 11(%ecx, %esi,1), %edx |
1100 | POP (%edi) |
1101 | POP (%esi) |
1102 | jmp L(less48bytes) |
1103 | |
1104 | cfi_restore_state |
1105 | cfi_remember_state |
1106 | .p2align 4 |
1107 | L(shr_11_gobble): |
1108 | sub $32, %ecx |
1109 | movdqa 16(%esi), %xmm0 |
1110 | palignr $11, (%esi), %xmm0 |
1111 | pcmpeqb (%edi), %xmm0 |
1112 | |
1113 | movdqa 32(%esi), %xmm3 |
1114 | palignr $11, 16(%esi), %xmm3 |
1115 | pcmpeqb 16(%edi), %xmm3 |
1116 | |
1117 | L(shr_11_gobble_loop): |
1118 | pand %xmm0, %xmm3 |
1119 | sub $32, %ecx |
1120 | pmovmskb %xmm3, %edx |
1121 | movdqa %xmm0, %xmm1 |
1122 | |
1123 | movdqa 64(%esi), %xmm3 |
1124 | palignr $11,48(%esi), %xmm3 |
1125 | sbb $0xffff, %edx |
1126 | movdqa 48(%esi), %xmm0 |
1127 | palignr $11,32(%esi), %xmm0 |
1128 | pcmpeqb 32(%edi), %xmm0 |
1129 | lea 32(%esi), %esi |
1130 | pcmpeqb 48(%edi), %xmm3 |
1131 | |
1132 | lea 32(%edi), %edi |
1133 | jz L(shr_11_gobble_loop) |
1134 | pand %xmm0, %xmm3 |
1135 | |
1136 | cmp $0, %ecx |
1137 | jge L(shr_11_gobble_next) |
1138 | inc %edx |
1139 | add $32, %ecx |
1140 | L(shr_11_gobble_next): |
1141 | test %edx, %edx |
1142 | jnz L(exit) |
1143 | |
1144 | pmovmskb %xmm3, %edx |
1145 | movdqa %xmm0, %xmm1 |
1146 | lea 32(%edi), %edi |
1147 | lea 32(%esi), %esi |
1148 | sub $0xffff, %edx |
1149 | jnz L(exit) |
1150 | |
1151 | lea (%ecx, %edi,1), %eax |
1152 | lea 11(%ecx, %esi,1), %edx |
1153 | POP (%edi) |
1154 | POP (%esi) |
1155 | jmp L(less48bytes) |
1156 | # endif |
1157 | |
1158 | cfi_restore_state |
1159 | cfi_remember_state |
1160 | .p2align 4 |
1161 | L(shr_12): |
1162 | cmp $80, %ecx |
1163 | lea -48(%ecx), %ecx |
1164 | mov %edx, %eax |
1165 | jae L(shr_12_gobble) |
1166 | |
1167 | movdqa 16(%esi), %xmm1 |
1168 | movdqa %xmm1, %xmm2 |
1169 | palignr $12, (%esi), %xmm1 |
1170 | pcmpeqb (%edi), %xmm1 |
1171 | |
1172 | movdqa 32(%esi), %xmm3 |
1173 | palignr $12, %xmm2, %xmm3 |
1174 | pcmpeqb 16(%edi), %xmm3 |
1175 | |
1176 | pand %xmm1, %xmm3 |
1177 | pmovmskb %xmm3, %edx |
1178 | lea 32(%edi), %edi |
1179 | lea 32(%esi), %esi |
1180 | sub $0xffff, %edx |
1181 | jnz L(exit) |
1182 | lea (%ecx, %edi,1), %eax |
1183 | lea 12(%ecx, %esi,1), %edx |
1184 | POP (%edi) |
1185 | POP (%esi) |
1186 | jmp L(less48bytes) |
1187 | |
1188 | cfi_restore_state |
1189 | cfi_remember_state |
1190 | .p2align 4 |
1191 | L(shr_12_gobble): |
1192 | sub $32, %ecx |
1193 | movdqa 16(%esi), %xmm0 |
1194 | palignr $12, (%esi), %xmm0 |
1195 | pcmpeqb (%edi), %xmm0 |
1196 | |
1197 | movdqa 32(%esi), %xmm3 |
1198 | palignr $12, 16(%esi), %xmm3 |
1199 | pcmpeqb 16(%edi), %xmm3 |
1200 | |
1201 | L(shr_12_gobble_loop): |
1202 | pand %xmm0, %xmm3 |
1203 | sub $32, %ecx |
1204 | pmovmskb %xmm3, %edx |
1205 | movdqa %xmm0, %xmm1 |
1206 | |
1207 | movdqa 64(%esi), %xmm3 |
1208 | palignr $12,48(%esi), %xmm3 |
1209 | sbb $0xffff, %edx |
1210 | movdqa 48(%esi), %xmm0 |
1211 | palignr $12,32(%esi), %xmm0 |
1212 | pcmpeqb 32(%edi), %xmm0 |
1213 | lea 32(%esi), %esi |
1214 | pcmpeqb 48(%edi), %xmm3 |
1215 | |
1216 | lea 32(%edi), %edi |
1217 | jz L(shr_12_gobble_loop) |
1218 | pand %xmm0, %xmm3 |
1219 | |
1220 | cmp $0, %ecx |
1221 | jge L(shr_12_gobble_next) |
1222 | inc %edx |
1223 | add $32, %ecx |
1224 | L(shr_12_gobble_next): |
1225 | test %edx, %edx |
1226 | jnz L(exit) |
1227 | |
1228 | pmovmskb %xmm3, %edx |
1229 | movdqa %xmm0, %xmm1 |
1230 | lea 32(%edi), %edi |
1231 | lea 32(%esi), %esi |
1232 | sub $0xffff, %edx |
1233 | jnz L(exit) |
1234 | |
1235 | lea (%ecx, %edi,1), %eax |
1236 | lea 12(%ecx, %esi,1), %edx |
1237 | POP (%edi) |
1238 | POP (%esi) |
1239 | jmp L(less48bytes) |
1240 | |
1241 | # ifndef USE_AS_WMEMCMP |
1242 | cfi_restore_state |
1243 | cfi_remember_state |
1244 | .p2align 4 |
1245 | L(shr_13): |
1246 | cmp $80, %ecx |
1247 | lea -48(%ecx), %ecx |
1248 | mov %edx, %eax |
1249 | jae L(shr_13_gobble) |
1250 | |
1251 | movdqa 16(%esi), %xmm1 |
1252 | movdqa %xmm1, %xmm2 |
1253 | palignr $13, (%esi), %xmm1 |
1254 | pcmpeqb (%edi), %xmm1 |
1255 | |
1256 | movdqa 32(%esi), %xmm3 |
1257 | palignr $13, %xmm2, %xmm3 |
1258 | pcmpeqb 16(%edi), %xmm3 |
1259 | |
1260 | pand %xmm1, %xmm3 |
1261 | pmovmskb %xmm3, %edx |
1262 | lea 32(%edi), %edi |
1263 | lea 32(%esi), %esi |
1264 | sub $0xffff, %edx |
1265 | jnz L(exit) |
1266 | lea (%ecx, %edi,1), %eax |
1267 | lea 13(%ecx, %esi,1), %edx |
1268 | POP (%edi) |
1269 | POP (%esi) |
1270 | jmp L(less48bytes) |
1271 | |
1272 | cfi_restore_state |
1273 | cfi_remember_state |
1274 | .p2align 4 |
1275 | L(shr_13_gobble): |
1276 | sub $32, %ecx |
1277 | movdqa 16(%esi), %xmm0 |
1278 | palignr $13, (%esi), %xmm0 |
1279 | pcmpeqb (%edi), %xmm0 |
1280 | |
1281 | movdqa 32(%esi), %xmm3 |
1282 | palignr $13, 16(%esi), %xmm3 |
1283 | pcmpeqb 16(%edi), %xmm3 |
1284 | |
1285 | L(shr_13_gobble_loop): |
1286 | pand %xmm0, %xmm3 |
1287 | sub $32, %ecx |
1288 | pmovmskb %xmm3, %edx |
1289 | movdqa %xmm0, %xmm1 |
1290 | |
1291 | movdqa 64(%esi), %xmm3 |
1292 | palignr $13,48(%esi), %xmm3 |
1293 | sbb $0xffff, %edx |
1294 | movdqa 48(%esi), %xmm0 |
1295 | palignr $13,32(%esi), %xmm0 |
1296 | pcmpeqb 32(%edi), %xmm0 |
1297 | lea 32(%esi), %esi |
1298 | pcmpeqb 48(%edi), %xmm3 |
1299 | |
1300 | lea 32(%edi), %edi |
1301 | jz L(shr_13_gobble_loop) |
1302 | pand %xmm0, %xmm3 |
1303 | |
1304 | cmp $0, %ecx |
1305 | jge L(shr_13_gobble_next) |
1306 | inc %edx |
1307 | add $32, %ecx |
1308 | L(shr_13_gobble_next): |
1309 | test %edx, %edx |
1310 | jnz L(exit) |
1311 | |
1312 | pmovmskb %xmm3, %edx |
1313 | movdqa %xmm0, %xmm1 |
1314 | lea 32(%edi), %edi |
1315 | lea 32(%esi), %esi |
1316 | sub $0xffff, %edx |
1317 | jnz L(exit) |
1318 | |
1319 | lea (%ecx, %edi,1), %eax |
1320 | lea 13(%ecx, %esi,1), %edx |
1321 | POP (%edi) |
1322 | POP (%esi) |
1323 | jmp L(less48bytes) |
1324 | |
1325 | cfi_restore_state |
1326 | cfi_remember_state |
1327 | .p2align 4 |
1328 | L(shr_14): |
1329 | cmp $80, %ecx |
1330 | lea -48(%ecx), %ecx |
1331 | mov %edx, %eax |
1332 | jae L(shr_14_gobble) |
1333 | |
1334 | movdqa 16(%esi), %xmm1 |
1335 | movdqa %xmm1, %xmm2 |
1336 | palignr $14, (%esi), %xmm1 |
1337 | pcmpeqb (%edi), %xmm1 |
1338 | |
1339 | movdqa 32(%esi), %xmm3 |
1340 | palignr $14, %xmm2, %xmm3 |
1341 | pcmpeqb 16(%edi), %xmm3 |
1342 | |
1343 | pand %xmm1, %xmm3 |
1344 | pmovmskb %xmm3, %edx |
1345 | lea 32(%edi), %edi |
1346 | lea 32(%esi), %esi |
1347 | sub $0xffff, %edx |
1348 | jnz L(exit) |
1349 | lea (%ecx, %edi,1), %eax |
1350 | lea 14(%ecx, %esi,1), %edx |
1351 | POP (%edi) |
1352 | POP (%esi) |
1353 | jmp L(less48bytes) |
1354 | |
1355 | cfi_restore_state |
1356 | cfi_remember_state |
1357 | .p2align 4 |
1358 | L(shr_14_gobble): |
1359 | sub $32, %ecx |
1360 | movdqa 16(%esi), %xmm0 |
1361 | palignr $14, (%esi), %xmm0 |
1362 | pcmpeqb (%edi), %xmm0 |
1363 | |
1364 | movdqa 32(%esi), %xmm3 |
1365 | palignr $14, 16(%esi), %xmm3 |
1366 | pcmpeqb 16(%edi), %xmm3 |
1367 | |
1368 | L(shr_14_gobble_loop): |
1369 | pand %xmm0, %xmm3 |
1370 | sub $32, %ecx |
1371 | pmovmskb %xmm3, %edx |
1372 | movdqa %xmm0, %xmm1 |
1373 | |
1374 | movdqa 64(%esi), %xmm3 |
1375 | palignr $14,48(%esi), %xmm3 |
1376 | sbb $0xffff, %edx |
1377 | movdqa 48(%esi), %xmm0 |
1378 | palignr $14,32(%esi), %xmm0 |
1379 | pcmpeqb 32(%edi), %xmm0 |
1380 | lea 32(%esi), %esi |
1381 | pcmpeqb 48(%edi), %xmm3 |
1382 | |
1383 | lea 32(%edi), %edi |
1384 | jz L(shr_14_gobble_loop) |
1385 | pand %xmm0, %xmm3 |
1386 | |
1387 | cmp $0, %ecx |
1388 | jge L(shr_14_gobble_next) |
1389 | inc %edx |
1390 | add $32, %ecx |
1391 | L(shr_14_gobble_next): |
1392 | test %edx, %edx |
1393 | jnz L(exit) |
1394 | |
1395 | pmovmskb %xmm3, %edx |
1396 | movdqa %xmm0, %xmm1 |
1397 | lea 32(%edi), %edi |
1398 | lea 32(%esi), %esi |
1399 | sub $0xffff, %edx |
1400 | jnz L(exit) |
1401 | |
1402 | lea (%ecx, %edi,1), %eax |
1403 | lea 14(%ecx, %esi,1), %edx |
1404 | POP (%edi) |
1405 | POP (%esi) |
1406 | jmp L(less48bytes) |
1407 | |
1408 | cfi_restore_state |
1409 | cfi_remember_state |
1410 | .p2align 4 |
1411 | L(shr_15): |
1412 | cmp $80, %ecx |
1413 | lea -48(%ecx), %ecx |
1414 | mov %edx, %eax |
1415 | jae L(shr_15_gobble) |
1416 | |
1417 | movdqa 16(%esi), %xmm1 |
1418 | movdqa %xmm1, %xmm2 |
1419 | palignr $15, (%esi), %xmm1 |
1420 | pcmpeqb (%edi), %xmm1 |
1421 | |
1422 | movdqa 32(%esi), %xmm3 |
1423 | palignr $15, %xmm2, %xmm3 |
1424 | pcmpeqb 16(%edi), %xmm3 |
1425 | |
1426 | pand %xmm1, %xmm3 |
1427 | pmovmskb %xmm3, %edx |
1428 | lea 32(%edi), %edi |
1429 | lea 32(%esi), %esi |
1430 | sub $0xffff, %edx |
1431 | jnz L(exit) |
1432 | lea (%ecx, %edi,1), %eax |
1433 | lea 15(%ecx, %esi,1), %edx |
1434 | POP (%edi) |
1435 | POP (%esi) |
1436 | jmp L(less48bytes) |
1437 | |
1438 | cfi_restore_state |
1439 | cfi_remember_state |
1440 | .p2align 4 |
1441 | L(shr_15_gobble): |
1442 | sub $32, %ecx |
1443 | movdqa 16(%esi), %xmm0 |
1444 | palignr $15, (%esi), %xmm0 |
1445 | pcmpeqb (%edi), %xmm0 |
1446 | |
1447 | movdqa 32(%esi), %xmm3 |
1448 | palignr $15, 16(%esi), %xmm3 |
1449 | pcmpeqb 16(%edi), %xmm3 |
1450 | |
1451 | L(shr_15_gobble_loop): |
1452 | pand %xmm0, %xmm3 |
1453 | sub $32, %ecx |
1454 | pmovmskb %xmm3, %edx |
1455 | movdqa %xmm0, %xmm1 |
1456 | |
1457 | movdqa 64(%esi), %xmm3 |
1458 | palignr $15,48(%esi), %xmm3 |
1459 | sbb $0xffff, %edx |
1460 | movdqa 48(%esi), %xmm0 |
1461 | palignr $15,32(%esi), %xmm0 |
1462 | pcmpeqb 32(%edi), %xmm0 |
1463 | lea 32(%esi), %esi |
1464 | pcmpeqb 48(%edi), %xmm3 |
1465 | |
1466 | lea 32(%edi), %edi |
1467 | jz L(shr_15_gobble_loop) |
1468 | pand %xmm0, %xmm3 |
1469 | |
1470 | cmp $0, %ecx |
1471 | jge L(shr_15_gobble_next) |
1472 | inc %edx |
1473 | add $32, %ecx |
1474 | L(shr_15_gobble_next): |
1475 | test %edx, %edx |
1476 | jnz L(exit) |
1477 | |
1478 | pmovmskb %xmm3, %edx |
1479 | movdqa %xmm0, %xmm1 |
1480 | lea 32(%edi), %edi |
1481 | lea 32(%esi), %esi |
1482 | sub $0xffff, %edx |
1483 | jnz L(exit) |
1484 | |
1485 | lea (%ecx, %edi,1), %eax |
1486 | lea 15(%ecx, %esi,1), %edx |
1487 | POP (%edi) |
1488 | POP (%esi) |
1489 | jmp L(less48bytes) |
1490 | # endif |
1491 | |
1492 | cfi_restore_state |
1493 | cfi_remember_state |
1494 | .p2align 4 |
1495 | L(exit): |
1496 | pmovmskb %xmm1, %ebx |
1497 | sub $0xffff, %ebx |
1498 | jz L(first16bytes) |
1499 | lea -16(%esi), %esi |
1500 | lea -16(%edi), %edi |
1501 | mov %ebx, %edx |
1502 | |
1503 | L(first16bytes): |
1504 | add %eax, %esi |
1505 | L(less16bytes): |
1506 | |
1507 | # ifndef USE_AS_WMEMCMP |
1508 | test %dl, %dl |
1509 | jz L(next_24_bytes) |
1510 | |
1511 | test $0x01, %dl |
1512 | jnz L(Byte16) |
1513 | |
1514 | test $0x02, %dl |
1515 | jnz L(Byte17) |
1516 | |
1517 | test $0x04, %dl |
1518 | jnz L(Byte18) |
1519 | |
1520 | test $0x08, %dl |
1521 | jnz L(Byte19) |
1522 | |
1523 | test $0x10, %dl |
1524 | jnz L(Byte20) |
1525 | |
1526 | test $0x20, %dl |
1527 | jnz L(Byte21) |
1528 | |
1529 | test $0x40, %dl |
1530 | jnz L(Byte22) |
1531 | L(Byte23): |
1532 | movzbl -9(%edi), %eax |
1533 | movzbl -9(%esi), %edx |
1534 | sub %edx, %eax |
1535 | RETURN |
1536 | |
1537 | .p2align 4 |
1538 | L(Byte16): |
1539 | movzbl -16(%edi), %eax |
1540 | movzbl -16(%esi), %edx |
1541 | sub %edx, %eax |
1542 | RETURN |
1543 | |
1544 | .p2align 4 |
1545 | L(Byte17): |
1546 | movzbl -15(%edi), %eax |
1547 | movzbl -15(%esi), %edx |
1548 | sub %edx, %eax |
1549 | RETURN |
1550 | |
1551 | .p2align 4 |
1552 | L(Byte18): |
1553 | movzbl -14(%edi), %eax |
1554 | movzbl -14(%esi), %edx |
1555 | sub %edx, %eax |
1556 | RETURN |
1557 | |
1558 | .p2align 4 |
1559 | L(Byte19): |
1560 | movzbl -13(%edi), %eax |
1561 | movzbl -13(%esi), %edx |
1562 | sub %edx, %eax |
1563 | RETURN |
1564 | |
1565 | .p2align 4 |
1566 | L(Byte20): |
1567 | movzbl -12(%edi), %eax |
1568 | movzbl -12(%esi), %edx |
1569 | sub %edx, %eax |
1570 | RETURN |
1571 | |
1572 | .p2align 4 |
1573 | L(Byte21): |
1574 | movzbl -11(%edi), %eax |
1575 | movzbl -11(%esi), %edx |
1576 | sub %edx, %eax |
1577 | RETURN |
1578 | |
1579 | .p2align 4 |
1580 | L(Byte22): |
1581 | movzbl -10(%edi), %eax |
1582 | movzbl -10(%esi), %edx |
1583 | sub %edx, %eax |
1584 | RETURN |
1585 | |
1586 | .p2align 4 |
1587 | L(next_24_bytes): |
1588 | lea 8(%edi), %edi |
1589 | lea 8(%esi), %esi |
1590 | test $0x01, %dh |
1591 | jnz L(Byte16) |
1592 | |
1593 | test $0x02, %dh |
1594 | jnz L(Byte17) |
1595 | |
1596 | test $0x04, %dh |
1597 | jnz L(Byte18) |
1598 | |
1599 | test $0x08, %dh |
1600 | jnz L(Byte19) |
1601 | |
1602 | test $0x10, %dh |
1603 | jnz L(Byte20) |
1604 | |
1605 | test $0x20, %dh |
1606 | jnz L(Byte21) |
1607 | |
1608 | test $0x40, %dh |
1609 | jnz L(Byte22) |
1610 | |
1611 | .p2align 4 |
1612 | L(Byte31): |
1613 | movzbl -9(%edi), %eax |
1614 | movzbl -9(%esi), %edx |
1615 | sub %edx, %eax |
1616 | RETURN_END |
1617 | # else |
1618 | |
1619 | /* special for wmemcmp */ |
1620 | xor %eax, %eax |
1621 | test %dl, %dl |
1622 | jz L(next_two_double_words) |
1623 | and $15, %dl |
1624 | jz L(second_double_word) |
1625 | mov -16(%edi), %eax |
1626 | cmp -16(%esi), %eax |
1627 | jne L(nequal) |
1628 | RETURN |
1629 | |
1630 | .p2align 4 |
1631 | L(second_double_word): |
1632 | mov -12(%edi), %eax |
1633 | cmp -12(%esi), %eax |
1634 | jne L(nequal) |
1635 | RETURN |
1636 | |
1637 | .p2align 4 |
1638 | L(next_two_double_words): |
1639 | and $15, %dh |
1640 | jz L(fourth_double_word) |
1641 | mov -8(%edi), %eax |
1642 | cmp -8(%esi), %eax |
1643 | jne L(nequal) |
1644 | RETURN |
1645 | |
1646 | .p2align 4 |
1647 | L(fourth_double_word): |
1648 | mov -4(%edi), %eax |
1649 | cmp -4(%esi), %eax |
1650 | jne L(nequal) |
1651 | RETURN |
1652 | |
1653 | .p2align 4 |
1654 | L(nequal): |
1655 | mov $1, %eax |
1656 | jg L(nequal_bigger) |
1657 | neg %eax |
1658 | RETURN |
1659 | |
1660 | .p2align 4 |
1661 | L(nequal_bigger): |
1662 | RETURN_END |
1663 | # endif |
1664 | |
1665 | CFI_PUSH (%ebx) |
1666 | |
1667 | .p2align 4 |
1668 | L(more8bytes): |
1669 | cmp $16, %ecx |
1670 | jae L(more16bytes) |
1671 | cmp $8, %ecx |
1672 | je L(8bytes) |
1673 | # ifndef USE_AS_WMEMCMP |
1674 | cmp $9, %ecx |
1675 | je L(9bytes) |
1676 | cmp $10, %ecx |
1677 | je L(10bytes) |
1678 | cmp $11, %ecx |
1679 | je L(11bytes) |
1680 | cmp $12, %ecx |
1681 | je L(12bytes) |
1682 | cmp $13, %ecx |
1683 | je L(13bytes) |
1684 | cmp $14, %ecx |
1685 | je L(14bytes) |
1686 | jmp L(15bytes) |
1687 | # else |
1688 | jmp L(12bytes) |
1689 | # endif |
1690 | |
1691 | .p2align 4 |
1692 | L(more16bytes): |
1693 | cmp $24, %ecx |
1694 | jae L(more24bytes) |
1695 | cmp $16, %ecx |
1696 | je L(16bytes) |
1697 | # ifndef USE_AS_WMEMCMP |
1698 | cmp $17, %ecx |
1699 | je L(17bytes) |
1700 | cmp $18, %ecx |
1701 | je L(18bytes) |
1702 | cmp $19, %ecx |
1703 | je L(19bytes) |
1704 | cmp $20, %ecx |
1705 | je L(20bytes) |
1706 | cmp $21, %ecx |
1707 | je L(21bytes) |
1708 | cmp $22, %ecx |
1709 | je L(22bytes) |
1710 | jmp L(23bytes) |
1711 | # else |
1712 | jmp L(20bytes) |
1713 | # endif |
1714 | |
1715 | .p2align 4 |
1716 | L(more24bytes): |
1717 | cmp $32, %ecx |
1718 | jae L(more32bytes) |
1719 | cmp $24, %ecx |
1720 | je L(24bytes) |
1721 | # ifndef USE_AS_WMEMCMP |
1722 | cmp $25, %ecx |
1723 | je L(25bytes) |
1724 | cmp $26, %ecx |
1725 | je L(26bytes) |
1726 | cmp $27, %ecx |
1727 | je L(27bytes) |
1728 | cmp $28, %ecx |
1729 | je L(28bytes) |
1730 | cmp $29, %ecx |
1731 | je L(29bytes) |
1732 | cmp $30, %ecx |
1733 | je L(30bytes) |
1734 | jmp L(31bytes) |
1735 | # else |
1736 | jmp L(28bytes) |
1737 | # endif |
1738 | |
1739 | .p2align 4 |
1740 | L(more32bytes): |
1741 | cmp $40, %ecx |
1742 | jae L(more40bytes) |
1743 | cmp $32, %ecx |
1744 | je L(32bytes) |
1745 | # ifndef USE_AS_WMEMCMP |
1746 | cmp $33, %ecx |
1747 | je L(33bytes) |
1748 | cmp $34, %ecx |
1749 | je L(34bytes) |
1750 | cmp $35, %ecx |
1751 | je L(35bytes) |
1752 | cmp $36, %ecx |
1753 | je L(36bytes) |
1754 | cmp $37, %ecx |
1755 | je L(37bytes) |
1756 | cmp $38, %ecx |
1757 | je L(38bytes) |
1758 | jmp L(39bytes) |
1759 | # else |
1760 | jmp L(36bytes) |
1761 | # endif |
1762 | |
1763 | .p2align 4 |
1764 | L(less48bytes): |
1765 | cmp $8, %ecx |
1766 | jae L(more8bytes) |
1767 | # ifndef USE_AS_WMEMCMP |
1768 | cmp $2, %ecx |
1769 | je L(2bytes) |
1770 | cmp $3, %ecx |
1771 | je L(3bytes) |
1772 | cmp $4, %ecx |
1773 | je L(4bytes) |
1774 | cmp $5, %ecx |
1775 | je L(5bytes) |
1776 | cmp $6, %ecx |
1777 | je L(6bytes) |
1778 | jmp L(7bytes) |
1779 | # else |
1780 | jmp L(4bytes) |
1781 | # endif |
1782 | |
1783 | .p2align 4 |
1784 | L(more40bytes): |
1785 | cmp $40, %ecx |
1786 | je L(40bytes) |
1787 | # ifndef USE_AS_WMEMCMP |
1788 | cmp $41, %ecx |
1789 | je L(41bytes) |
1790 | cmp $42, %ecx |
1791 | je L(42bytes) |
1792 | cmp $43, %ecx |
1793 | je L(43bytes) |
1794 | cmp $44, %ecx |
1795 | je L(44bytes) |
1796 | cmp $45, %ecx |
1797 | je L(45bytes) |
1798 | cmp $46, %ecx |
1799 | je L(46bytes) |
1800 | jmp L(47bytes) |
1801 | |
1802 | .p2align 4 |
1803 | L(44bytes): |
1804 | mov -44(%eax), %ecx |
1805 | mov -44(%edx), %ebx |
1806 | cmp %ebx, %ecx |
1807 | jne L(find_diff) |
1808 | L(40bytes): |
1809 | mov -40(%eax), %ecx |
1810 | mov -40(%edx), %ebx |
1811 | cmp %ebx, %ecx |
1812 | jne L(find_diff) |
1813 | L(36bytes): |
1814 | mov -36(%eax), %ecx |
1815 | mov -36(%edx), %ebx |
1816 | cmp %ebx, %ecx |
1817 | jne L(find_diff) |
1818 | L(32bytes): |
1819 | mov -32(%eax), %ecx |
1820 | mov -32(%edx), %ebx |
1821 | cmp %ebx, %ecx |
1822 | jne L(find_diff) |
1823 | L(28bytes): |
1824 | mov -28(%eax), %ecx |
1825 | mov -28(%edx), %ebx |
1826 | cmp %ebx, %ecx |
1827 | jne L(find_diff) |
1828 | L(24bytes): |
1829 | mov -24(%eax), %ecx |
1830 | mov -24(%edx), %ebx |
1831 | cmp %ebx, %ecx |
1832 | jne L(find_diff) |
1833 | L(20bytes): |
1834 | mov -20(%eax), %ecx |
1835 | mov -20(%edx), %ebx |
1836 | cmp %ebx, %ecx |
1837 | jne L(find_diff) |
1838 | L(16bytes): |
1839 | mov -16(%eax), %ecx |
1840 | mov -16(%edx), %ebx |
1841 | cmp %ebx, %ecx |
1842 | jne L(find_diff) |
1843 | L(12bytes): |
1844 | mov -12(%eax), %ecx |
1845 | mov -12(%edx), %ebx |
1846 | cmp %ebx, %ecx |
1847 | jne L(find_diff) |
1848 | L(8bytes): |
1849 | mov -8(%eax), %ecx |
1850 | mov -8(%edx), %ebx |
1851 | cmp %ebx, %ecx |
1852 | jne L(find_diff) |
1853 | L(4bytes): |
1854 | mov -4(%eax), %ecx |
1855 | mov -4(%edx), %ebx |
1856 | cmp %ebx, %ecx |
1857 | mov $0, %eax |
1858 | jne L(find_diff) |
1859 | POP (%ebx) |
1860 | ret |
1861 | CFI_PUSH (%ebx) |
1862 | # else |
1863 | .p2align 4 |
1864 | L(44bytes): |
1865 | mov -44(%eax), %ecx |
1866 | cmp -44(%edx), %ecx |
1867 | jne L(find_diff) |
1868 | L(40bytes): |
1869 | mov -40(%eax), %ecx |
1870 | cmp -40(%edx), %ecx |
1871 | jne L(find_diff) |
1872 | L(36bytes): |
1873 | mov -36(%eax), %ecx |
1874 | cmp -36(%edx), %ecx |
1875 | jne L(find_diff) |
1876 | L(32bytes): |
1877 | mov -32(%eax), %ecx |
1878 | cmp -32(%edx), %ecx |
1879 | jne L(find_diff) |
1880 | L(28bytes): |
1881 | mov -28(%eax), %ecx |
1882 | cmp -28(%edx), %ecx |
1883 | jne L(find_diff) |
1884 | L(24bytes): |
1885 | mov -24(%eax), %ecx |
1886 | cmp -24(%edx), %ecx |
1887 | jne L(find_diff) |
1888 | L(20bytes): |
1889 | mov -20(%eax), %ecx |
1890 | cmp -20(%edx), %ecx |
1891 | jne L(find_diff) |
1892 | L(16bytes): |
1893 | mov -16(%eax), %ecx |
1894 | cmp -16(%edx), %ecx |
1895 | jne L(find_diff) |
1896 | L(12bytes): |
1897 | mov -12(%eax), %ecx |
1898 | cmp -12(%edx), %ecx |
1899 | jne L(find_diff) |
1900 | L(8bytes): |
1901 | mov -8(%eax), %ecx |
1902 | cmp -8(%edx), %ecx |
1903 | jne L(find_diff) |
1904 | L(4bytes): |
1905 | mov -4(%eax), %ecx |
1906 | xor %eax, %eax |
1907 | cmp -4(%edx), %ecx |
1908 | jne L(find_diff) |
1909 | POP (%ebx) |
1910 | ret |
1911 | CFI_PUSH (%ebx) |
1912 | # endif |
1913 | |
1914 | # ifndef USE_AS_WMEMCMP |
1915 | |
1916 | .p2align 4 |
1917 | L(45bytes): |
1918 | mov -45(%eax), %ecx |
1919 | mov -45(%edx), %ebx |
1920 | cmp %ebx, %ecx |
1921 | jne L(find_diff) |
1922 | L(41bytes): |
1923 | mov -41(%eax), %ecx |
1924 | mov -41(%edx), %ebx |
1925 | cmp %ebx, %ecx |
1926 | jne L(find_diff) |
1927 | L(37bytes): |
1928 | mov -37(%eax), %ecx |
1929 | mov -37(%edx), %ebx |
1930 | cmp %ebx, %ecx |
1931 | jne L(find_diff) |
1932 | L(33bytes): |
1933 | mov -33(%eax), %ecx |
1934 | mov -33(%edx), %ebx |
1935 | cmp %ebx, %ecx |
1936 | jne L(find_diff) |
1937 | L(29bytes): |
1938 | mov -29(%eax), %ecx |
1939 | mov -29(%edx), %ebx |
1940 | cmp %ebx, %ecx |
1941 | jne L(find_diff) |
1942 | L(25bytes): |
1943 | mov -25(%eax), %ecx |
1944 | mov -25(%edx), %ebx |
1945 | cmp %ebx, %ecx |
1946 | jne L(find_diff) |
1947 | L(21bytes): |
1948 | mov -21(%eax), %ecx |
1949 | mov -21(%edx), %ebx |
1950 | cmp %ebx, %ecx |
1951 | jne L(find_diff) |
1952 | L(17bytes): |
1953 | mov -17(%eax), %ecx |
1954 | mov -17(%edx), %ebx |
1955 | cmp %ebx, %ecx |
1956 | jne L(find_diff) |
1957 | L(13bytes): |
1958 | mov -13(%eax), %ecx |
1959 | mov -13(%edx), %ebx |
1960 | cmp %ebx, %ecx |
1961 | jne L(find_diff) |
1962 | L(9bytes): |
1963 | mov -9(%eax), %ecx |
1964 | mov -9(%edx), %ebx |
1965 | cmp %ebx, %ecx |
1966 | jne L(find_diff) |
1967 | L(5bytes): |
1968 | mov -5(%eax), %ecx |
1969 | mov -5(%edx), %ebx |
1970 | cmp %ebx, %ecx |
1971 | jne L(find_diff) |
1972 | movzbl -1(%eax), %ecx |
1973 | cmp -1(%edx), %cl |
1974 | mov $0, %eax |
1975 | jne L(end) |
1976 | POP (%ebx) |
1977 | ret |
1978 | CFI_PUSH (%ebx) |
1979 | |
1980 | .p2align 4 |
1981 | L(46bytes): |
1982 | mov -46(%eax), %ecx |
1983 | mov -46(%edx), %ebx |
1984 | cmp %ebx, %ecx |
1985 | jne L(find_diff) |
1986 | L(42bytes): |
1987 | mov -42(%eax), %ecx |
1988 | mov -42(%edx), %ebx |
1989 | cmp %ebx, %ecx |
1990 | jne L(find_diff) |
1991 | L(38bytes): |
1992 | mov -38(%eax), %ecx |
1993 | mov -38(%edx), %ebx |
1994 | cmp %ebx, %ecx |
1995 | jne L(find_diff) |
1996 | L(34bytes): |
1997 | mov -34(%eax), %ecx |
1998 | mov -34(%edx), %ebx |
1999 | cmp %ebx, %ecx |
2000 | jne L(find_diff) |
2001 | L(30bytes): |
2002 | mov -30(%eax), %ecx |
2003 | mov -30(%edx), %ebx |
2004 | cmp %ebx, %ecx |
2005 | jne L(find_diff) |
2006 | L(26bytes): |
2007 | mov -26(%eax), %ecx |
2008 | mov -26(%edx), %ebx |
2009 | cmp %ebx, %ecx |
2010 | jne L(find_diff) |
2011 | L(22bytes): |
2012 | mov -22(%eax), %ecx |
2013 | mov -22(%edx), %ebx |
2014 | cmp %ebx, %ecx |
2015 | jne L(find_diff) |
2016 | L(18bytes): |
2017 | mov -18(%eax), %ecx |
2018 | mov -18(%edx), %ebx |
2019 | cmp %ebx, %ecx |
2020 | jne L(find_diff) |
2021 | L(14bytes): |
2022 | mov -14(%eax), %ecx |
2023 | mov -14(%edx), %ebx |
2024 | cmp %ebx, %ecx |
2025 | jne L(find_diff) |
2026 | L(10bytes): |
2027 | mov -10(%eax), %ecx |
2028 | mov -10(%edx), %ebx |
2029 | cmp %ebx, %ecx |
2030 | jne L(find_diff) |
2031 | L(6bytes): |
2032 | mov -6(%eax), %ecx |
2033 | mov -6(%edx), %ebx |
2034 | cmp %ebx, %ecx |
2035 | jne L(find_diff) |
2036 | L(2bytes): |
2037 | movzwl -2(%eax), %ecx |
2038 | movzwl -2(%edx), %ebx |
2039 | cmp %bl, %cl |
2040 | jne L(end) |
2041 | cmp %bh, %ch |
2042 | mov $0, %eax |
2043 | jne L(end) |
2044 | POP (%ebx) |
2045 | ret |
2046 | CFI_PUSH (%ebx) |
2047 | |
2048 | .p2align 4 |
2049 | L(47bytes): |
2050 | movl -47(%eax), %ecx |
2051 | movl -47(%edx), %ebx |
2052 | cmp %ebx, %ecx |
2053 | jne L(find_diff) |
2054 | L(43bytes): |
2055 | movl -43(%eax), %ecx |
2056 | movl -43(%edx), %ebx |
2057 | cmp %ebx, %ecx |
2058 | jne L(find_diff) |
2059 | L(39bytes): |
2060 | movl -39(%eax), %ecx |
2061 | movl -39(%edx), %ebx |
2062 | cmp %ebx, %ecx |
2063 | jne L(find_diff) |
2064 | L(35bytes): |
2065 | movl -35(%eax), %ecx |
2066 | movl -35(%edx), %ebx |
2067 | cmp %ebx, %ecx |
2068 | jne L(find_diff) |
2069 | L(31bytes): |
2070 | movl -31(%eax), %ecx |
2071 | movl -31(%edx), %ebx |
2072 | cmp %ebx, %ecx |
2073 | jne L(find_diff) |
2074 | L(27bytes): |
2075 | movl -27(%eax), %ecx |
2076 | movl -27(%edx), %ebx |
2077 | cmp %ebx, %ecx |
2078 | jne L(find_diff) |
2079 | L(23bytes): |
2080 | movl -23(%eax), %ecx |
2081 | movl -23(%edx), %ebx |
2082 | cmp %ebx, %ecx |
2083 | jne L(find_diff) |
2084 | L(19bytes): |
2085 | movl -19(%eax), %ecx |
2086 | movl -19(%edx), %ebx |
2087 | cmp %ebx, %ecx |
2088 | jne L(find_diff) |
2089 | L(15bytes): |
2090 | movl -15(%eax), %ecx |
2091 | movl -15(%edx), %ebx |
2092 | cmp %ebx, %ecx |
2093 | jne L(find_diff) |
2094 | L(11bytes): |
2095 | movl -11(%eax), %ecx |
2096 | movl -11(%edx), %ebx |
2097 | cmp %ebx, %ecx |
2098 | jne L(find_diff) |
2099 | L(7bytes): |
2100 | movl -7(%eax), %ecx |
2101 | movl -7(%edx), %ebx |
2102 | cmp %ebx, %ecx |
2103 | jne L(find_diff) |
2104 | L(3bytes): |
2105 | movzwl -3(%eax), %ecx |
2106 | movzwl -3(%edx), %ebx |
2107 | cmpb %bl, %cl |
2108 | jne L(end) |
2109 | cmp %bx, %cx |
2110 | jne L(end) |
2111 | movzbl -1(%eax), %eax |
2112 | cmpb -1(%edx), %al |
2113 | mov $0, %eax |
2114 | jne L(end) |
2115 | POP (%ebx) |
2116 | ret |
2117 | CFI_PUSH (%ebx) |
2118 | |
2119 | .p2align 4 |
2120 | L(find_diff): |
2121 | cmpb %bl, %cl |
2122 | jne L(end) |
2123 | cmp %bx, %cx |
2124 | jne L(end) |
2125 | shr $16,%ecx |
2126 | shr $16,%ebx |
2127 | cmp %bl, %cl |
2128 | jne L(end) |
2129 | cmp %bx, %cx |
2130 | |
2131 | .p2align 4 |
2132 | L(end): |
2133 | POP (%ebx) |
2134 | mov $1, %eax |
2135 | ja L(bigger) |
2136 | neg %eax |
2137 | L(bigger): |
2138 | ret |
2139 | # else |
2140 | |
2141 | /* for wmemcmp */ |
2142 | .p2align 4 |
2143 | L(find_diff): |
2144 | POP (%ebx) |
2145 | mov $1, %eax |
2146 | jg L(find_diff_bigger) |
2147 | neg %eax |
2148 | ret |
2149 | |
2150 | .p2align 4 |
2151 | L(find_diff_bigger): |
2152 | ret |
2153 | |
2154 | # endif |
2155 | END (MEMCMP) |
2156 | #endif |
2157 | |