1/* memcmp with SSE4.2, wmemcmp with SSE4.2
2 Copyright (C) 2010-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22
23# ifndef MEMCMP
24# define MEMCMP __memcmp_sse4_2
25# endif
26
27# define CFI_PUSH(REG) \
28 cfi_adjust_cfa_offset (4); \
29 cfi_rel_offset (REG, 0)
30
31# define CFI_POP(REG) \
32 cfi_adjust_cfa_offset (-4); \
33 cfi_restore (REG)
34
35# define PUSH(REG) pushl REG; CFI_PUSH (REG)
36# define POP(REG) popl REG; CFI_POP (REG)
37
38# define PARMS 4
39# define BLK1 PARMS
40# define BLK2 BLK1 + 4
41# define LEN BLK2 + 4
42# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
43
44
45# ifdef PIC
46# define JMPTBL(I, B) I - B
47
48/* Load an entry in a jump table into EBX and branch to it. TABLE is a
49 jump table with relative offsets. INDEX is a register contains the
50 index into the jump table. SCALE is the scale of INDEX. */
51
52# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
53/* We first load PC into EBX. */ \
54 SETUP_PIC_REG(bx); \
55/* Get the address of the jump table. */ \
56 addl $(TABLE - .), %ebx; \
57/* Get the entry and convert the relative offset to the \
58 absolute address. */ \
59 addl (%ebx,INDEX,SCALE), %ebx; \
60/* We loaded the jump table and adjusted EDX/ESI. Go. */ \
61 jmp *%ebx
62# else
63# define JMPTBL(I, B) I
64
65/* Load an entry in a jump table into EBX and branch to it. TABLE is a
66 jump table with relative offsets. INDEX is a register contains the
67 index into the jump table. SCALE is the scale of INDEX. */
68# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
69 jmp *TABLE(,INDEX,SCALE)
70# endif
71
72
73/* Warning!
74 wmemcmp has to use SIGNED comparison for elements.
75 memcmp has to use UNSIGNED comparison for elements.
76*/
77
78 .section .text.sse4.2,"ax",@progbits
79ENTRY (MEMCMP)
80 movl BLK1(%esp), %eax
81 movl BLK2(%esp), %edx
82 movl LEN(%esp), %ecx
83
84# ifdef USE_AS_WMEMCMP
85 shl $2, %ecx
86 test %ecx, %ecx
87 jz L(return0)
88# else
89 cmp $1, %ecx
90 jbe L(less1bytes)
91# endif
92
93 pxor %xmm0, %xmm0
94 cmp $64, %ecx
95 ja L(64bytesormore)
96 cmp $8, %ecx
97
98# ifndef USE_AS_WMEMCMP
99 PUSH (%ebx)
100 jb L(less8bytes)
101# else
102 jb L(less8bytes)
103 PUSH (%ebx)
104# endif
105
106 add %ecx, %edx
107 add %ecx, %eax
108 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
109
110# ifndef USE_AS_WMEMCMP
111 .p2align 4
112L(less8bytes):
113 mov (%eax), %bl
114 cmpb (%edx), %bl
115 jne L(nonzero)
116
117 mov 1(%eax), %bl
118 cmpb 1(%edx), %bl
119 jne L(nonzero)
120
121 cmp $2, %ecx
122 jz L(0bytes)
123
124 mov 2(%eax), %bl
125 cmpb 2(%edx), %bl
126 jne L(nonzero)
127
128 cmp $3, %ecx
129 jz L(0bytes)
130
131 mov 3(%eax), %bl
132 cmpb 3(%edx), %bl
133 jne L(nonzero)
134
135 cmp $4, %ecx
136 jz L(0bytes)
137
138 mov 4(%eax), %bl
139 cmpb 4(%edx), %bl
140 jne L(nonzero)
141
142 cmp $5, %ecx
143 jz L(0bytes)
144
145 mov 5(%eax), %bl
146 cmpb 5(%edx), %bl
147 jne L(nonzero)
148
149 cmp $6, %ecx
150 jz L(0bytes)
151
152 mov 6(%eax), %bl
153 cmpb 6(%edx), %bl
154 je L(0bytes)
155
156L(nonzero):
157 POP (%ebx)
158 mov $1, %eax
159 ja L(above)
160 neg %eax
161L(above):
162 ret
163 CFI_PUSH (%ebx)
164# endif
165
166 .p2align 4
167L(0bytes):
168 POP (%ebx)
169 xor %eax, %eax
170 ret
171
172# ifdef USE_AS_WMEMCMP
173
174/* for wmemcmp, case N == 1 */
175
176 .p2align 4
177L(less8bytes):
178 mov (%eax), %ecx
179 cmp (%edx), %ecx
180 je L(return0)
181 mov $1, %eax
182 jg L(find_diff_bigger)
183 neg %eax
184 ret
185
186 .p2align 4
187L(find_diff_bigger):
188 ret
189
190 .p2align 4
191L(return0):
192 xor %eax, %eax
193 ret
194# endif
195
196# ifndef USE_AS_WMEMCMP
197 .p2align 4
198L(less1bytes):
199 jb L(0bytesend)
200 movzbl (%eax), %eax
201 movzbl (%edx), %edx
202 sub %edx, %eax
203 ret
204
205 .p2align 4
206L(0bytesend):
207 xor %eax, %eax
208 ret
209# endif
210 .p2align 4
211L(64bytesormore):
212 PUSH (%ebx)
213 mov %ecx, %ebx
214 mov $64, %ecx
215 sub $64, %ebx
216L(64bytesormore_loop):
217 movdqu (%eax), %xmm1
218 movdqu (%edx), %xmm2
219 pxor %xmm1, %xmm2
220 ptest %xmm2, %xmm0
221 jnc L(find_16diff)
222
223 movdqu 16(%eax), %xmm1
224 movdqu 16(%edx), %xmm2
225 pxor %xmm1, %xmm2
226 ptest %xmm2, %xmm0
227 jnc L(find_32diff)
228
229 movdqu 32(%eax), %xmm1
230 movdqu 32(%edx), %xmm2
231 pxor %xmm1, %xmm2
232 ptest %xmm2, %xmm0
233 jnc L(find_48diff)
234
235 movdqu 48(%eax), %xmm1
236 movdqu 48(%edx), %xmm2
237 pxor %xmm1, %xmm2
238 ptest %xmm2, %xmm0
239 jnc L(find_64diff)
240 add %ecx, %eax
241 add %ecx, %edx
242 sub %ecx, %ebx
243 jae L(64bytesormore_loop)
244 add %ebx, %ecx
245 add %ecx, %edx
246 add %ecx, %eax
247 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
248
249# ifdef USE_AS_WMEMCMP
250
251/* Label needs only for table_64bytes filling */
252L(unreal_case):
253/* no code here */
254
255# endif
256 .p2align 4
257L(find_16diff):
258 sub $16, %ecx
259L(find_32diff):
260 sub $16, %ecx
261L(find_48diff):
262 sub $16, %ecx
263L(find_64diff):
264 add %ecx, %edx
265 add %ecx, %eax
266
267# ifndef USE_AS_WMEMCMP
268 .p2align 4
269L(16bytes):
270 mov -16(%eax), %ecx
271 mov -16(%edx), %ebx
272 cmp %ebx, %ecx
273 jne L(find_diff)
274L(12bytes):
275 mov -12(%eax), %ecx
276 mov -12(%edx), %ebx
277 cmp %ebx, %ecx
278 jne L(find_diff)
279L(8bytes):
280 mov -8(%eax), %ecx
281 mov -8(%edx), %ebx
282 cmp %ebx, %ecx
283 jne L(find_diff)
284L(4bytes):
285 mov -4(%eax), %ecx
286 mov -4(%edx), %ebx
287 cmp %ebx, %ecx
288 mov $0, %eax
289 jne L(find_diff)
290 RETURN
291# else
292 .p2align 4
293L(16bytes):
294 mov -16(%eax), %ecx
295 cmp -16(%edx), %ecx
296 jne L(find_diff)
297L(12bytes):
298 mov -12(%eax), %ecx
299 cmp -12(%edx), %ecx
300 jne L(find_diff)
301L(8bytes):
302 mov -8(%eax), %ecx
303 cmp -8(%edx), %ecx
304 jne L(find_diff)
305L(4bytes):
306 mov -4(%eax), %ecx
307 cmp -4(%edx), %ecx
308 mov $0, %eax
309 jne L(find_diff)
310 RETURN
311# endif
312
313# ifndef USE_AS_WMEMCMP
314 .p2align 4
315L(49bytes):
316 movdqu -49(%eax), %xmm1
317 movdqu -49(%edx), %xmm2
318 mov $-49, %ebx
319 pxor %xmm1, %xmm2
320 ptest %xmm2, %xmm0
321 jnc L(less16bytes)
322L(33bytes):
323 movdqu -33(%eax), %xmm1
324 movdqu -33(%edx), %xmm2
325 mov $-33, %ebx
326 pxor %xmm1, %xmm2
327 ptest %xmm2, %xmm0
328 jnc L(less16bytes)
329L(17bytes):
330 mov -17(%eax), %ecx
331 mov -17(%edx), %ebx
332 cmp %ebx, %ecx
333 jne L(find_diff)
334L(13bytes):
335 mov -13(%eax), %ecx
336 mov -13(%edx), %ebx
337 cmp %ebx, %ecx
338 jne L(find_diff)
339L(9bytes):
340 mov -9(%eax), %ecx
341 mov -9(%edx), %ebx
342 cmp %ebx, %ecx
343 jne L(find_diff)
344L(5bytes):
345 mov -5(%eax), %ecx
346 mov -5(%edx), %ebx
347 cmp %ebx, %ecx
348 jne L(find_diff)
349 movzbl -1(%eax), %ecx
350 cmp -1(%edx), %cl
351 mov $0, %eax
352 jne L(end)
353 RETURN
354
355 .p2align 4
356L(50bytes):
357 mov $-50, %ebx
358 movdqu -50(%eax), %xmm1
359 movdqu -50(%edx), %xmm2
360 pxor %xmm1, %xmm2
361 ptest %xmm2, %xmm0
362 jnc L(less16bytes)
363L(34bytes):
364 mov $-34, %ebx
365 movdqu -34(%eax), %xmm1
366 movdqu -34(%edx), %xmm2
367 pxor %xmm1, %xmm2
368 ptest %xmm2, %xmm0
369 jnc L(less16bytes)
370L(18bytes):
371 mov -18(%eax), %ecx
372 mov -18(%edx), %ebx
373 cmp %ebx, %ecx
374 jne L(find_diff)
375L(14bytes):
376 mov -14(%eax), %ecx
377 mov -14(%edx), %ebx
378 cmp %ebx, %ecx
379 jne L(find_diff)
380L(10bytes):
381 mov -10(%eax), %ecx
382 mov -10(%edx), %ebx
383 cmp %ebx, %ecx
384 jne L(find_diff)
385L(6bytes):
386 mov -6(%eax), %ecx
387 mov -6(%edx), %ebx
388 cmp %ebx, %ecx
389 jne L(find_diff)
390L(2bytes):
391 movzwl -2(%eax), %ecx
392 movzwl -2(%edx), %ebx
393 cmp %bl, %cl
394 jne L(end)
395 cmp %bh, %ch
396 mov $0, %eax
397 jne L(end)
398 RETURN
399
400 .p2align 4
401L(51bytes):
402 mov $-51, %ebx
403 movdqu -51(%eax), %xmm1
404 movdqu -51(%edx), %xmm2
405 pxor %xmm1, %xmm2
406 ptest %xmm2, %xmm0
407 jnc L(less16bytes)
408L(35bytes):
409 mov $-35, %ebx
410 movdqu -35(%eax), %xmm1
411 movdqu -35(%edx), %xmm2
412 pxor %xmm1, %xmm2
413 ptest %xmm2, %xmm0
414 jnc L(less16bytes)
415L(19bytes):
416 movl -19(%eax), %ecx
417 movl -19(%edx), %ebx
418 cmp %ebx, %ecx
419 jne L(find_diff)
420L(15bytes):
421 movl -15(%eax), %ecx
422 movl -15(%edx), %ebx
423 cmp %ebx, %ecx
424 jne L(find_diff)
425L(11bytes):
426 movl -11(%eax), %ecx
427 movl -11(%edx), %ebx
428 cmp %ebx, %ecx
429 jne L(find_diff)
430L(7bytes):
431 movl -7(%eax), %ecx
432 movl -7(%edx), %ebx
433 cmp %ebx, %ecx
434 jne L(find_diff)
435L(3bytes):
436 movzwl -3(%eax), %ecx
437 movzwl -3(%edx), %ebx
438 cmpb %bl, %cl
439 jne L(end)
440 cmp %bx, %cx
441 jne L(end)
442L(1bytes):
443 movzbl -1(%eax), %eax
444 cmpb -1(%edx), %al
445 mov $0, %eax
446 jne L(end)
447 RETURN
448# endif
449 .p2align 4
450L(52bytes):
451 movdqu -52(%eax), %xmm1
452 movdqu -52(%edx), %xmm2
453 mov $-52, %ebx
454 pxor %xmm1, %xmm2
455 ptest %xmm2, %xmm0
456 jnc L(less16bytes)
457L(36bytes):
458 movdqu -36(%eax), %xmm1
459 movdqu -36(%edx), %xmm2
460 mov $-36, %ebx
461 pxor %xmm1, %xmm2
462 ptest %xmm2, %xmm0
463 jnc L(less16bytes)
464L(20bytes):
465 movdqu -20(%eax), %xmm1
466 movdqu -20(%edx), %xmm2
467 mov $-20, %ebx
468 pxor %xmm1, %xmm2
469 ptest %xmm2, %xmm0
470 jnc L(less16bytes)
471 mov -4(%eax), %ecx
472# ifndef USE_AS_WMEMCMP
473 mov -4(%edx), %ebx
474 cmp %ebx, %ecx
475# else
476 cmp -4(%edx), %ecx
477# endif
478 mov $0, %eax
479 jne L(find_diff)
480 RETURN
481
482# ifndef USE_AS_WMEMCMP
483 .p2align 4
484L(53bytes):
485 movdqu -53(%eax), %xmm1
486 movdqu -53(%edx), %xmm2
487 mov $-53, %ebx
488 pxor %xmm1, %xmm2
489 ptest %xmm2, %xmm0
490 jnc L(less16bytes)
491L(37bytes):
492 mov $-37, %ebx
493 movdqu -37(%eax), %xmm1
494 movdqu -37(%edx), %xmm2
495 pxor %xmm1, %xmm2
496 ptest %xmm2, %xmm0
497 jnc L(less16bytes)
498L(21bytes):
499 mov $-21, %ebx
500 movdqu -21(%eax), %xmm1
501 movdqu -21(%edx), %xmm2
502 pxor %xmm1, %xmm2
503 ptest %xmm2, %xmm0
504 jnc L(less16bytes)
505 mov -5(%eax), %ecx
506 mov -5(%edx), %ebx
507 cmp %ebx, %ecx
508 jne L(find_diff)
509 movzbl -1(%eax), %ecx
510 cmp -1(%edx), %cl
511 mov $0, %eax
512 jne L(end)
513 RETURN
514
515 .p2align 4
516L(54bytes):
517 movdqu -54(%eax), %xmm1
518 movdqu -54(%edx), %xmm2
519 mov $-54, %ebx
520 pxor %xmm1, %xmm2
521 ptest %xmm2, %xmm0
522 jnc L(less16bytes)
523L(38bytes):
524 mov $-38, %ebx
525 movdqu -38(%eax), %xmm1
526 movdqu -38(%edx), %xmm2
527 pxor %xmm1, %xmm2
528 ptest %xmm2, %xmm0
529 jnc L(less16bytes)
530L(22bytes):
531 mov $-22, %ebx
532 movdqu -22(%eax), %xmm1
533 movdqu -22(%edx), %xmm2
534 pxor %xmm1, %xmm2
535 ptest %xmm2, %xmm0
536 jnc L(less16bytes)
537
538 mov -6(%eax), %ecx
539 mov -6(%edx), %ebx
540 cmp %ebx, %ecx
541 jne L(find_diff)
542 movzwl -2(%eax), %ecx
543 movzwl -2(%edx), %ebx
544 cmp %bl, %cl
545 jne L(end)
546 cmp %bh, %ch
547 mov $0, %eax
548 jne L(end)
549 RETURN
550
551 .p2align 4
552L(55bytes):
553 movdqu -55(%eax), %xmm1
554 movdqu -55(%edx), %xmm2
555 mov $-55, %ebx
556 pxor %xmm1, %xmm2
557 ptest %xmm2, %xmm0
558 jnc L(less16bytes)
559L(39bytes):
560 mov $-39, %ebx
561 movdqu -39(%eax), %xmm1
562 movdqu -39(%edx), %xmm2
563 pxor %xmm1, %xmm2
564 ptest %xmm2, %xmm0
565 jnc L(less16bytes)
566L(23bytes):
567 mov $-23, %ebx
568 movdqu -23(%eax), %xmm1
569 movdqu -23(%edx), %xmm2
570 pxor %xmm1, %xmm2
571 ptest %xmm2, %xmm0
572 jnc L(less16bytes)
573 movl -7(%eax), %ecx
574 movl -7(%edx), %ebx
575 cmp %ebx, %ecx
576 jne L(find_diff)
577 movzwl -3(%eax), %ecx
578 movzwl -3(%edx), %ebx
579 cmpb %bl, %cl
580 jne L(end)
581 cmp %bx, %cx
582 jne L(end)
583 movzbl -1(%eax), %eax
584 cmpb -1(%edx), %al
585 mov $0, %eax
586 jne L(end)
587 RETURN
588# endif
589 .p2align 4
590L(56bytes):
591 movdqu -56(%eax), %xmm1
592 movdqu -56(%edx), %xmm2
593 mov $-56, %ebx
594 pxor %xmm1, %xmm2
595 ptest %xmm2, %xmm0
596 jnc L(less16bytes)
597L(40bytes):
598 mov $-40, %ebx
599 movdqu -40(%eax), %xmm1
600 movdqu -40(%edx), %xmm2
601 pxor %xmm1, %xmm2
602 ptest %xmm2, %xmm0
603 jnc L(less16bytes)
604L(24bytes):
605 mov $-24, %ebx
606 movdqu -24(%eax), %xmm1
607 movdqu -24(%edx), %xmm2
608 pxor %xmm1, %xmm2
609 ptest %xmm2, %xmm0
610 jnc L(less16bytes)
611
612 mov -8(%eax), %ecx
613# ifndef USE_AS_WMEMCMP
614 mov -8(%edx), %ebx
615 cmp %ebx, %ecx
616# else
617 cmp -8(%edx), %ecx
618# endif
619 jne L(find_diff)
620
621 mov -4(%eax), %ecx
622# ifndef USE_AS_WMEMCMP
623 mov -4(%edx), %ebx
624 cmp %ebx, %ecx
625# else
626 cmp -4(%edx), %ecx
627# endif
628 mov $0, %eax
629 jne L(find_diff)
630 RETURN
631
632# ifndef USE_AS_WMEMCMP
633 .p2align 4
634L(57bytes):
635 movdqu -57(%eax), %xmm1
636 movdqu -57(%edx), %xmm2
637 mov $-57, %ebx
638 pxor %xmm1, %xmm2
639 ptest %xmm2, %xmm0
640 jnc L(less16bytes)
641L(41bytes):
642 mov $-41, %ebx
643 movdqu -41(%eax), %xmm1
644 movdqu -41(%edx), %xmm2
645 pxor %xmm1, %xmm2
646 ptest %xmm2, %xmm0
647 jnc L(less16bytes)
648L(25bytes):
649 mov $-25, %ebx
650 movdqu -25(%eax), %xmm1
651 movdqu -25(%edx), %xmm2
652 pxor %xmm1, %xmm2
653 ptest %xmm2, %xmm0
654 jnc L(less16bytes)
655 mov -9(%eax), %ecx
656 mov -9(%edx), %ebx
657 cmp %ebx, %ecx
658 jne L(find_diff)
659 mov -5(%eax), %ecx
660 mov -5(%edx), %ebx
661 cmp %ebx, %ecx
662 jne L(find_diff)
663 movzbl -1(%eax), %ecx
664 cmp -1(%edx), %cl
665 mov $0, %eax
666 jne L(end)
667 RETURN
668
669 .p2align 4
670L(58bytes):
671 movdqu -58(%eax), %xmm1
672 movdqu -58(%edx), %xmm2
673 mov $-58, %ebx
674 pxor %xmm1, %xmm2
675 ptest %xmm2, %xmm0
676 jnc L(less16bytes)
677L(42bytes):
678 mov $-42, %ebx
679 movdqu -42(%eax), %xmm1
680 movdqu -42(%edx), %xmm2
681 pxor %xmm1, %xmm2
682 ptest %xmm2, %xmm0
683 jnc L(less16bytes)
684L(26bytes):
685 mov $-26, %ebx
686 movdqu -26(%eax), %xmm1
687 movdqu -26(%edx), %xmm2
688 pxor %xmm1, %xmm2
689 ptest %xmm2, %xmm0
690 jnc L(less16bytes)
691
692 mov -10(%eax), %ecx
693 mov -10(%edx), %ebx
694 cmp %ebx, %ecx
695 jne L(find_diff)
696
697 mov -6(%eax), %ecx
698 mov -6(%edx), %ebx
699 cmp %ebx, %ecx
700 jne L(find_diff)
701
702 movzwl -2(%eax), %ecx
703 movzwl -2(%edx), %ebx
704 cmp %bl, %cl
705 jne L(end)
706 cmp %bh, %ch
707 mov $0, %eax
708 jne L(end)
709 RETURN
710
711 .p2align 4
712L(59bytes):
713 movdqu -59(%eax), %xmm1
714 movdqu -59(%edx), %xmm2
715 mov $-59, %ebx
716 pxor %xmm1, %xmm2
717 ptest %xmm2, %xmm0
718 jnc L(less16bytes)
719L(43bytes):
720 mov $-43, %ebx
721 movdqu -43(%eax), %xmm1
722 movdqu -43(%edx), %xmm2
723 pxor %xmm1, %xmm2
724 ptest %xmm2, %xmm0
725 jnc L(less16bytes)
726L(27bytes):
727 mov $-27, %ebx
728 movdqu -27(%eax), %xmm1
729 movdqu -27(%edx), %xmm2
730 pxor %xmm1, %xmm2
731 ptest %xmm2, %xmm0
732 jnc L(less16bytes)
733 movl -11(%eax), %ecx
734 movl -11(%edx), %ebx
735 cmp %ebx, %ecx
736 jne L(find_diff)
737 movl -7(%eax), %ecx
738 movl -7(%edx), %ebx
739 cmp %ebx, %ecx
740 jne L(find_diff)
741 movzwl -3(%eax), %ecx
742 movzwl -3(%edx), %ebx
743 cmpb %bl, %cl
744 jne L(end)
745 cmp %bx, %cx
746 jne L(end)
747 movzbl -1(%eax), %eax
748 cmpb -1(%edx), %al
749 mov $0, %eax
750 jne L(end)
751 RETURN
752# endif
753 .p2align 4
754L(60bytes):
755 movdqu -60(%eax), %xmm1
756 movdqu -60(%edx), %xmm2
757 mov $-60, %ebx
758 pxor %xmm1, %xmm2
759 ptest %xmm2, %xmm0
760 jnc L(less16bytes)
761L(44bytes):
762 mov $-44, %ebx
763 movdqu -44(%eax), %xmm1
764 movdqu -44(%edx), %xmm2
765 pxor %xmm1, %xmm2
766 ptest %xmm2, %xmm0
767 jnc L(less16bytes)
768L(28bytes):
769 mov $-28, %ebx
770 movdqu -28(%eax), %xmm1
771 movdqu -28(%edx), %xmm2
772 pxor %xmm1, %xmm2
773 ptest %xmm2, %xmm0
774 jnc L(less16bytes)
775
776 mov -12(%eax), %ecx
777# ifndef USE_AS_WMEMCMP
778 mov -12(%edx), %ebx
779 cmp %ebx, %ecx
780# else
781 cmp -12(%edx), %ecx
782# endif
783 jne L(find_diff)
784
785 mov -8(%eax), %ecx
786# ifndef USE_AS_WMEMCMP
787 mov -8(%edx), %ebx
788 cmp %ebx, %ecx
789# else
790 cmp -8(%edx), %ecx
791# endif
792 jne L(find_diff)
793
794 mov -4(%eax), %ecx
795# ifndef USE_AS_WMEMCMP
796 mov -4(%edx), %ebx
797 cmp %ebx, %ecx
798# else
799 cmp -4(%edx), %ecx
800# endif
801 mov $0, %eax
802 jne L(find_diff)
803 RETURN
804
805# ifndef USE_AS_WMEMCMP
806 .p2align 4
807L(61bytes):
808 movdqu -61(%eax), %xmm1
809 movdqu -61(%edx), %xmm2
810 mov $-61, %ebx
811 pxor %xmm1, %xmm2
812 ptest %xmm2, %xmm0
813 jnc L(less16bytes)
814L(45bytes):
815 mov $-45, %ebx
816 movdqu -45(%eax), %xmm1
817 movdqu -45(%edx), %xmm2
818 pxor %xmm1, %xmm2
819 ptest %xmm2, %xmm0
820 jnc L(less16bytes)
821L(29bytes):
822 mov $-29, %ebx
823 movdqu -29(%eax), %xmm1
824 movdqu -29(%edx), %xmm2
825 pxor %xmm1, %xmm2
826 ptest %xmm2, %xmm0
827 jnc L(less16bytes)
828
829 mov -13(%eax), %ecx
830 mov -13(%edx), %ebx
831 cmp %ebx, %ecx
832 jne L(find_diff)
833
834 mov -9(%eax), %ecx
835 mov -9(%edx), %ebx
836 cmp %ebx, %ecx
837 jne L(find_diff)
838
839 mov -5(%eax), %ecx
840 mov -5(%edx), %ebx
841 cmp %ebx, %ecx
842 jne L(find_diff)
843 movzbl -1(%eax), %ecx
844 cmp -1(%edx), %cl
845 mov $0, %eax
846 jne L(end)
847 RETURN
848
849 .p2align 4
850L(62bytes):
851 movdqu -62(%eax), %xmm1
852 movdqu -62(%edx), %xmm2
853 mov $-62, %ebx
854 pxor %xmm1, %xmm2
855 ptest %xmm2, %xmm0
856 jnc L(less16bytes)
857L(46bytes):
858 mov $-46, %ebx
859 movdqu -46(%eax), %xmm1
860 movdqu -46(%edx), %xmm2
861 pxor %xmm1, %xmm2
862 ptest %xmm2, %xmm0
863 jnc L(less16bytes)
864L(30bytes):
865 mov $-30, %ebx
866 movdqu -30(%eax), %xmm1
867 movdqu -30(%edx), %xmm2
868 pxor %xmm1, %xmm2
869 ptest %xmm2, %xmm0
870 jnc L(less16bytes)
871 mov -14(%eax), %ecx
872 mov -14(%edx), %ebx
873 cmp %ebx, %ecx
874 jne L(find_diff)
875 mov -10(%eax), %ecx
876 mov -10(%edx), %ebx
877 cmp %ebx, %ecx
878 jne L(find_diff)
879 mov -6(%eax), %ecx
880 mov -6(%edx), %ebx
881 cmp %ebx, %ecx
882 jne L(find_diff)
883 movzwl -2(%eax), %ecx
884 movzwl -2(%edx), %ebx
885 cmp %bl, %cl
886 jne L(end)
887 cmp %bh, %ch
888 mov $0, %eax
889 jne L(end)
890 RETURN
891
892 .p2align 4
893L(63bytes):
894 movdqu -63(%eax), %xmm1
895 movdqu -63(%edx), %xmm2
896 mov $-63, %ebx
897 pxor %xmm1, %xmm2
898 ptest %xmm2, %xmm0
899 jnc L(less16bytes)
900L(47bytes):
901 mov $-47, %ebx
902 movdqu -47(%eax), %xmm1
903 movdqu -47(%edx), %xmm2
904 pxor %xmm1, %xmm2
905 ptest %xmm2, %xmm0
906 jnc L(less16bytes)
907L(31bytes):
908 mov $-31, %ebx
909 movdqu -31(%eax), %xmm1
910 movdqu -31(%edx), %xmm2
911 pxor %xmm1, %xmm2
912 ptest %xmm2, %xmm0
913 jnc L(less16bytes)
914
915 movl -15(%eax), %ecx
916 movl -15(%edx), %ebx
917 cmp %ebx, %ecx
918 jne L(find_diff)
919 movl -11(%eax), %ecx
920 movl -11(%edx), %ebx
921 cmp %ebx, %ecx
922 jne L(find_diff)
923 movl -7(%eax), %ecx
924 movl -7(%edx), %ebx
925 cmp %ebx, %ecx
926 jne L(find_diff)
927 movzwl -3(%eax), %ecx
928 movzwl -3(%edx), %ebx
929 cmpb %bl, %cl
930 jne L(end)
931 cmp %bx, %cx
932 jne L(end)
933 movzbl -1(%eax), %eax
934 cmpb -1(%edx), %al
935 mov $0, %eax
936 jne L(end)
937 RETURN
938# endif
939
940 .p2align 4
941L(64bytes):
942 movdqu -64(%eax), %xmm1
943 movdqu -64(%edx), %xmm2
944 mov $-64, %ebx
945 pxor %xmm1, %xmm2
946 ptest %xmm2, %xmm0
947 jnc L(less16bytes)
948L(48bytes):
949 movdqu -48(%eax), %xmm1
950 movdqu -48(%edx), %xmm2
951 mov $-48, %ebx
952 pxor %xmm1, %xmm2
953 ptest %xmm2, %xmm0
954 jnc L(less16bytes)
955L(32bytes):
956 movdqu -32(%eax), %xmm1
957 movdqu -32(%edx), %xmm2
958 mov $-32, %ebx
959 pxor %xmm1, %xmm2
960 ptest %xmm2, %xmm0
961 jnc L(less16bytes)
962
963 mov -16(%eax), %ecx
964# ifndef USE_AS_WMEMCMP
965 mov -16(%edx), %ebx
966 cmp %ebx, %ecx
967# else
968 cmp -16(%edx), %ecx
969# endif
970 jne L(find_diff)
971
972 mov -12(%eax), %ecx
973# ifndef USE_AS_WMEMCMP
974 mov -12(%edx), %ebx
975 cmp %ebx, %ecx
976# else
977 cmp -12(%edx), %ecx
978# endif
979 jne L(find_diff)
980
981 mov -8(%eax), %ecx
982# ifndef USE_AS_WMEMCMP
983 mov -8(%edx), %ebx
984 cmp %ebx, %ecx
985# else
986 cmp -8(%edx), %ecx
987# endif
988 jne L(find_diff)
989
990 mov -4(%eax), %ecx
991# ifndef USE_AS_WMEMCMP
992 mov -4(%edx), %ebx
993 cmp %ebx, %ecx
994# else
995 cmp -4(%edx), %ecx
996# endif
997 mov $0, %eax
998 jne L(find_diff)
999 RETURN
1000
1001# ifndef USE_AS_WMEMCMP
1002 .p2align 4
1003L(less16bytes):
1004 add %ebx, %eax
1005 add %ebx, %edx
1006
1007 mov (%eax), %ecx
1008 mov (%edx), %ebx
1009 cmp %ebx, %ecx
1010 jne L(find_diff)
1011
1012 mov 4(%eax), %ecx
1013 mov 4(%edx), %ebx
1014 cmp %ebx, %ecx
1015 jne L(find_diff)
1016
1017 mov 8(%eax), %ecx
1018 mov 8(%edx), %ebx
1019 cmp %ebx, %ecx
1020 jne L(find_diff)
1021
1022 mov 12(%eax), %ecx
1023 mov 12(%edx), %ebx
1024 cmp %ebx, %ecx
1025 mov $0, %eax
1026 jne L(find_diff)
1027 RETURN
1028# else
1029 .p2align 4
1030L(less16bytes):
1031 add %ebx, %eax
1032 add %ebx, %edx
1033
1034 mov (%eax), %ecx
1035 cmp (%edx), %ecx
1036 jne L(find_diff)
1037
1038 mov 4(%eax), %ecx
1039 cmp 4(%edx), %ecx
1040 jne L(find_diff)
1041
1042 mov 8(%eax), %ecx
1043 cmp 8(%edx), %ecx
1044 jne L(find_diff)
1045
1046 mov 12(%eax), %ecx
1047 cmp 12(%edx), %ecx
1048
1049 mov $0, %eax
1050 jne L(find_diff)
1051 RETURN
1052# endif
1053
1054 .p2align 4
1055L(find_diff):
1056# ifndef USE_AS_WMEMCMP
1057 cmpb %bl, %cl
1058 jne L(end)
1059 cmp %bx, %cx
1060 jne L(end)
1061 shr $16,%ecx
1062 shr $16,%ebx
1063 cmp %bl, %cl
1064 jne L(end)
1065 cmp %bx, %cx
1066L(end):
1067 POP (%ebx)
1068 mov $1, %eax
1069 ja L(bigger)
1070 neg %eax
1071L(bigger):
1072 ret
1073# else
1074 POP (%ebx)
1075 mov $1, %eax
1076 jg L(bigger)
1077 neg %eax
1078 ret
1079
1080 .p2align 4
1081L(bigger):
1082 ret
1083# endif
1084END (MEMCMP)
1085
1086 .section .rodata.sse4.2,"a",@progbits
1087 .p2align 2
1088 .type L(table_64bytes), @object
1089# ifndef USE_AS_WMEMCMP
1090L(table_64bytes):
1091 .int JMPTBL (L(0bytes), L(table_64bytes))
1092 .int JMPTBL (L(1bytes), L(table_64bytes))
1093 .int JMPTBL (L(2bytes), L(table_64bytes))
1094 .int JMPTBL (L(3bytes), L(table_64bytes))
1095 .int JMPTBL (L(4bytes), L(table_64bytes))
1096 .int JMPTBL (L(5bytes), L(table_64bytes))
1097 .int JMPTBL (L(6bytes), L(table_64bytes))
1098 .int JMPTBL (L(7bytes), L(table_64bytes))
1099 .int JMPTBL (L(8bytes), L(table_64bytes))
1100 .int JMPTBL (L(9bytes), L(table_64bytes))
1101 .int JMPTBL (L(10bytes), L(table_64bytes))
1102 .int JMPTBL (L(11bytes), L(table_64bytes))
1103 .int JMPTBL (L(12bytes), L(table_64bytes))
1104 .int JMPTBL (L(13bytes), L(table_64bytes))
1105 .int JMPTBL (L(14bytes), L(table_64bytes))
1106 .int JMPTBL (L(15bytes), L(table_64bytes))
1107 .int JMPTBL (L(16bytes), L(table_64bytes))
1108 .int JMPTBL (L(17bytes), L(table_64bytes))
1109 .int JMPTBL (L(18bytes), L(table_64bytes))
1110 .int JMPTBL (L(19bytes), L(table_64bytes))
1111 .int JMPTBL (L(20bytes), L(table_64bytes))
1112 .int JMPTBL (L(21bytes), L(table_64bytes))
1113 .int JMPTBL (L(22bytes), L(table_64bytes))
1114 .int JMPTBL (L(23bytes), L(table_64bytes))
1115 .int JMPTBL (L(24bytes), L(table_64bytes))
1116 .int JMPTBL (L(25bytes), L(table_64bytes))
1117 .int JMPTBL (L(26bytes), L(table_64bytes))
1118 .int JMPTBL (L(27bytes), L(table_64bytes))
1119 .int JMPTBL (L(28bytes), L(table_64bytes))
1120 .int JMPTBL (L(29bytes), L(table_64bytes))
1121 .int JMPTBL (L(30bytes), L(table_64bytes))
1122 .int JMPTBL (L(31bytes), L(table_64bytes))
1123 .int JMPTBL (L(32bytes), L(table_64bytes))
1124 .int JMPTBL (L(33bytes), L(table_64bytes))
1125 .int JMPTBL (L(34bytes), L(table_64bytes))
1126 .int JMPTBL (L(35bytes), L(table_64bytes))
1127 .int JMPTBL (L(36bytes), L(table_64bytes))
1128 .int JMPTBL (L(37bytes), L(table_64bytes))
1129 .int JMPTBL (L(38bytes), L(table_64bytes))
1130 .int JMPTBL (L(39bytes), L(table_64bytes))
1131 .int JMPTBL (L(40bytes), L(table_64bytes))
1132 .int JMPTBL (L(41bytes), L(table_64bytes))
1133 .int JMPTBL (L(42bytes), L(table_64bytes))
1134 .int JMPTBL (L(43bytes), L(table_64bytes))
1135 .int JMPTBL (L(44bytes), L(table_64bytes))
1136 .int JMPTBL (L(45bytes), L(table_64bytes))
1137 .int JMPTBL (L(46bytes), L(table_64bytes))
1138 .int JMPTBL (L(47bytes), L(table_64bytes))
1139 .int JMPTBL (L(48bytes), L(table_64bytes))
1140 .int JMPTBL (L(49bytes), L(table_64bytes))
1141 .int JMPTBL (L(50bytes), L(table_64bytes))
1142 .int JMPTBL (L(51bytes), L(table_64bytes))
1143 .int JMPTBL (L(52bytes), L(table_64bytes))
1144 .int JMPTBL (L(53bytes), L(table_64bytes))
1145 .int JMPTBL (L(54bytes), L(table_64bytes))
1146 .int JMPTBL (L(55bytes), L(table_64bytes))
1147 .int JMPTBL (L(56bytes), L(table_64bytes))
1148 .int JMPTBL (L(57bytes), L(table_64bytes))
1149 .int JMPTBL (L(58bytes), L(table_64bytes))
1150 .int JMPTBL (L(59bytes), L(table_64bytes))
1151 .int JMPTBL (L(60bytes), L(table_64bytes))
1152 .int JMPTBL (L(61bytes), L(table_64bytes))
1153 .int JMPTBL (L(62bytes), L(table_64bytes))
1154 .int JMPTBL (L(63bytes), L(table_64bytes))
1155 .int JMPTBL (L(64bytes), L(table_64bytes))
1156# else
1157L(table_64bytes):
1158 .int JMPTBL (L(0bytes), L(table_64bytes))
1159 .int JMPTBL (L(unreal_case), L(table_64bytes))
1160 .int JMPTBL (L(unreal_case), L(table_64bytes))
1161 .int JMPTBL (L(unreal_case), L(table_64bytes))
1162 .int JMPTBL (L(4bytes), L(table_64bytes))
1163 .int JMPTBL (L(unreal_case), L(table_64bytes))
1164 .int JMPTBL (L(unreal_case), L(table_64bytes))
1165 .int JMPTBL (L(unreal_case), L(table_64bytes))
1166 .int JMPTBL (L(8bytes), L(table_64bytes))
1167 .int JMPTBL (L(unreal_case), L(table_64bytes))
1168 .int JMPTBL (L(unreal_case), L(table_64bytes))
1169 .int JMPTBL (L(unreal_case), L(table_64bytes))
1170 .int JMPTBL (L(12bytes), L(table_64bytes))
1171 .int JMPTBL (L(unreal_case), L(table_64bytes))
1172 .int JMPTBL (L(unreal_case), L(table_64bytes))
1173 .int JMPTBL (L(unreal_case), L(table_64bytes))
1174 .int JMPTBL (L(16bytes), L(table_64bytes))
1175 .int JMPTBL (L(unreal_case), L(table_64bytes))
1176 .int JMPTBL (L(unreal_case), L(table_64bytes))
1177 .int JMPTBL (L(unreal_case), L(table_64bytes))
1178 .int JMPTBL (L(20bytes), L(table_64bytes))
1179 .int JMPTBL (L(unreal_case), L(table_64bytes))
1180 .int JMPTBL (L(unreal_case), L(table_64bytes))
1181 .int JMPTBL (L(unreal_case), L(table_64bytes))
1182 .int JMPTBL (L(24bytes), L(table_64bytes))
1183 .int JMPTBL (L(unreal_case), L(table_64bytes))
1184 .int JMPTBL (L(unreal_case), L(table_64bytes))
1185 .int JMPTBL (L(unreal_case), L(table_64bytes))
1186 .int JMPTBL (L(28bytes), L(table_64bytes))
1187 .int JMPTBL (L(unreal_case), L(table_64bytes))
1188 .int JMPTBL (L(unreal_case), L(table_64bytes))
1189 .int JMPTBL (L(unreal_case), L(table_64bytes))
1190 .int JMPTBL (L(32bytes), L(table_64bytes))
1191 .int JMPTBL (L(unreal_case), L(table_64bytes))
1192 .int JMPTBL (L(unreal_case), L(table_64bytes))
1193 .int JMPTBL (L(unreal_case), L(table_64bytes))
1194 .int JMPTBL (L(36bytes), L(table_64bytes))
1195 .int JMPTBL (L(unreal_case), L(table_64bytes))
1196 .int JMPTBL (L(unreal_case), L(table_64bytes))
1197 .int JMPTBL (L(unreal_case), L(table_64bytes))
1198 .int JMPTBL (L(40bytes), L(table_64bytes))
1199 .int JMPTBL (L(unreal_case), L(table_64bytes))
1200 .int JMPTBL (L(unreal_case), L(table_64bytes))
1201 .int JMPTBL (L(unreal_case), L(table_64bytes))
1202 .int JMPTBL (L(44bytes), L(table_64bytes))
1203 .int JMPTBL (L(unreal_case), L(table_64bytes))
1204 .int JMPTBL (L(unreal_case), L(table_64bytes))
1205 .int JMPTBL (L(unreal_case), L(table_64bytes))
1206 .int JMPTBL (L(48bytes), L(table_64bytes))
1207 .int JMPTBL (L(unreal_case), L(table_64bytes))
1208 .int JMPTBL (L(unreal_case), L(table_64bytes))
1209 .int JMPTBL (L(unreal_case), L(table_64bytes))
1210 .int JMPTBL (L(52bytes), L(table_64bytes))
1211 .int JMPTBL (L(unreal_case), L(table_64bytes))
1212 .int JMPTBL (L(unreal_case), L(table_64bytes))
1213 .int JMPTBL (L(unreal_case), L(table_64bytes))
1214 .int JMPTBL (L(56bytes), L(table_64bytes))
1215 .int JMPTBL (L(unreal_case), L(table_64bytes))
1216 .int JMPTBL (L(unreal_case), L(table_64bytes))
1217 .int JMPTBL (L(unreal_case), L(table_64bytes))
1218 .int JMPTBL (L(60bytes), L(table_64bytes))
1219 .int JMPTBL (L(unreal_case), L(table_64bytes))
1220 .int JMPTBL (L(unreal_case), L(table_64bytes))
1221 .int JMPTBL (L(unreal_case), L(table_64bytes))
1222 .int JMPTBL (L(64bytes), L(table_64bytes))
1223# endif
1224#endif
1225

source code of glibc/sysdeps/i386/i686/multiarch/memcmp-sse4.S