1/* strcmp with SSSE3
2 Copyright (C) 2010-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21#include <sysdep.h>
22#include "asm-syntax.h"
23
24#define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
27
28#define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
30 cfi_restore (REG)
31
32#define PUSH(REG) pushl REG; CFI_PUSH (REG)
33#define POP(REG) popl REG; CFI_POP (REG)
34
35#ifdef USE_AS_STRNCMP
36# ifndef STRCMP
37# define STRCMP __strncmp_ssse3
38# endif
39# define STR1 8
40# define STR2 STR1+4
41# define CNT STR2+4
42# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
43# define UPDATE_STRNCMP_COUNTER \
44 /* calculate left number to compare */ \
45 mov $16, %esi; \
46 sub %ecx, %esi; \
47 cmp %esi, REM; \
48 jbe L(more8byteseq); \
49 sub %esi, REM
50# define FLAGS %ebx
51# define REM %ebp
52#elif defined USE_AS_STRCASECMP_L
53# include "locale-defines.h"
54# ifndef STRCMP
55# define STRCMP __strcasecmp_l_ssse3
56# endif
57# ifdef PIC
58# define STR1 8
59# else
60# define STR1 4
61# endif
62# define STR2 STR1+4
63# define LOCALE 12 /* Loaded before the adjustment. */
64# ifdef PIC
65# define RETURN POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx)
66# else
67# define RETURN ret; .p2align 4
68# endif
69# define UPDATE_STRNCMP_COUNTER
70# define FLAGS (%esp)
71# define NONASCII __strcasecmp_nonascii
72#elif defined USE_AS_STRNCASECMP_L
73# include "locale-defines.h"
74# ifndef STRCMP
75# define STRCMP __strncasecmp_l_ssse3
76# endif
77# ifdef PIC
78# define STR1 12
79# else
80# define STR1 8
81# endif
82# define STR2 STR1+4
83# define CNT STR2+4
84# define LOCALE 16 /* Loaded before the adjustment. */
85# ifdef PIC
86# define RETURN POP (REM); POP (%ebx); ret; \
87 .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM)
88# else
89# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
90# endif
91# define UPDATE_STRNCMP_COUNTER \
92 /* calculate left number to compare */ \
93 mov $16, %esi; \
94 sub %ecx, %esi; \
95 cmp %esi, REM; \
96 jbe L(more8byteseq); \
97 sub %esi, REM
98# define FLAGS (%esp)
99# define REM %ebp
100# define NONASCII __strncasecmp_nonascii
101#else
102# ifndef STRCMP
103# define STRCMP __strcmp_ssse3
104# endif
105# define STR1 4
106# define STR2 STR1+4
107# define RETURN ret; .p2align 4
108# define UPDATE_STRNCMP_COUNTER
109# define FLAGS %ebx
110#endif
111
112 .section .text.ssse3,"ax",@progbits
113
114#ifdef USE_AS_STRCASECMP_L
115ENTRY (__strcasecmp_ssse3)
116# ifdef PIC
117 PUSH (%ebx)
118 LOAD_PIC_REG(bx)
119 movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
120 movl %gs:(%eax), %eax
121# else
122 movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
123# endif
124# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
125 movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
126# else
127 movl (%eax), %eax
128# endif
129 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
130# ifdef PIC
131 je L(ascii)
132 POP (%ebx)
133 jmp __strcasecmp_nonascii
134# else
135 jne __strcasecmp_nonascii
136 jmp L(ascii)
137# endif
138END (__strcasecmp_ssse3)
139#endif
140
141#ifdef USE_AS_STRNCASECMP_L
142ENTRY (__strncasecmp_ssse3)
143# ifdef PIC
144 PUSH (%ebx)
145 LOAD_PIC_REG(bx)
146 movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
147 movl %gs:(%eax), %eax
148# else
149 movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
150# endif
151# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
152 movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
153# else
154 movl (%eax), %eax
155# endif
156 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
157# ifdef PIC
158 je L(ascii)
159 POP (%ebx)
160 jmp __strncasecmp_nonascii
161# else
162 jne __strncasecmp_nonascii
163 jmp L(ascii)
164# endif
165END (__strncasecmp_ssse3)
166#endif
167
168ENTRY (STRCMP)
169#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
170 movl LOCALE(%esp), %eax
171# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
172 movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
173# else
174 movl (%eax), %eax
175# endif
176 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
177 jne NONASCII
178
179# ifdef PIC
180 PUSH (%ebx)
181 LOAD_PIC_REG(bx)
182# endif
183L(ascii):
184 .section .rodata.cst16,"aM",@progbits,16
185 .align 16
186.Lbelowupper:
187 .quad 0x4040404040404040
188 .quad 0x4040404040404040
189.Ltopupper:
190 .quad 0x5b5b5b5b5b5b5b5b
191 .quad 0x5b5b5b5b5b5b5b5b
192.Ltouppermask:
193 .quad 0x2020202020202020
194 .quad 0x2020202020202020
195 .previous
196
197# ifdef PIC
198# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
199# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
200# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
201# else
202# define UCLOW_reg .Lbelowupper
203# define UCHIGH_reg .Ltopupper
204# define LCQWORD_reg .Ltouppermask
205# endif
206#endif
207
208#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
209 PUSH (REM)
210#endif
211
212 movl STR1(%esp), %edx
213 movl STR2(%esp), %eax
214#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
215 movl CNT(%esp), REM
216 cmp $16, REM
217 jb L(less16bytes_sncmp)
218#elif !defined USE_AS_STRCASECMP_L
219 movzbl (%eax), %ecx
220 cmpb %cl, (%edx)
221 jne L(neq)
222 cmpl $0, %ecx
223 je L(eq)
224
225 movzbl 1(%eax), %ecx
226 cmpb %cl, 1(%edx)
227 jne L(neq)
228 cmpl $0, %ecx
229 je L(eq)
230
231 movzbl 2(%eax), %ecx
232 cmpb %cl, 2(%edx)
233 jne L(neq)
234 cmpl $0, %ecx
235 je L(eq)
236
237 movzbl 3(%eax), %ecx
238 cmpb %cl, 3(%edx)
239 jne L(neq)
240 cmpl $0, %ecx
241 je L(eq)
242
243 movzbl 4(%eax), %ecx
244 cmpb %cl, 4(%edx)
245 jne L(neq)
246 cmpl $0, %ecx
247 je L(eq)
248
249 movzbl 5(%eax), %ecx
250 cmpb %cl, 5(%edx)
251 jne L(neq)
252 cmpl $0, %ecx
253 je L(eq)
254
255 movzbl 6(%eax), %ecx
256 cmpb %cl, 6(%edx)
257 jne L(neq)
258 cmpl $0, %ecx
259 je L(eq)
260
261 movzbl 7(%eax), %ecx
262 cmpb %cl, 7(%edx)
263 jne L(neq)
264 cmpl $0, %ecx
265 je L(eq)
266
267 add $8, %edx
268 add $8, %eax
269#endif
270 movl %edx, %ecx
271 and $0xfff, %ecx
272 cmp $0xff0, %ecx
273 ja L(crosspage)
274 mov %eax, %ecx
275 and $0xfff, %ecx
276 cmp $0xff0, %ecx
277 ja L(crosspage)
278 pxor %xmm0, %xmm0
279 movlpd (%eax), %xmm1
280 movlpd (%edx), %xmm2
281 movhpd 8(%eax), %xmm1
282 movhpd 8(%edx), %xmm2
283#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
284# define TOLOWER(reg1, reg2) \
285 movdqa reg1, %xmm5; \
286 movdqa reg2, %xmm7; \
287 movdqa UCHIGH_reg, %xmm6; \
288 pcmpgtb UCLOW_reg, %xmm5; \
289 pcmpgtb UCLOW_reg, %xmm7; \
290 pcmpgtb reg1, %xmm6; \
291 pand %xmm6, %xmm5; \
292 movdqa UCHIGH_reg, %xmm6; \
293 pcmpgtb reg2, %xmm6; \
294 pand %xmm6, %xmm7; \
295 pand LCQWORD_reg, %xmm5; \
296 por %xmm5, reg1; \
297 pand LCQWORD_reg, %xmm7; \
298 por %xmm7, reg2
299 TOLOWER (%xmm1, %xmm2)
300#else
301# define TOLOWER(reg1, reg2)
302#endif
303 pcmpeqb %xmm1, %xmm0
304 pcmpeqb %xmm2, %xmm1
305 psubb %xmm0, %xmm1
306 pmovmskb %xmm1, %ecx
307 sub $0xffff, %ecx
308 jnz L(less16bytes)
309#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
310 cmp $16, REM
311 lea -16(REM), REM
312 jbe L(eq)
313#endif
314 add $16, %eax
315 add $16, %edx
316
317L(crosspage):
318
319#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
320 PUSH (FLAGS)
321#endif
322 PUSH (%edi)
323 PUSH (%esi)
324#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
325 pushl $0
326 cfi_adjust_cfa_offset (4)
327#endif
328#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
329 cfi_remember_state
330#endif
331
332 movl %edx, %edi
333 movl %eax, %ecx
334 and $0xf, %ecx
335 and $0xf, %edi
336 xor %ecx, %eax
337 xor %edi, %edx
338#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
339 xor FLAGS, FLAGS
340#endif
341 cmp %edi, %ecx
342 je L(ashr_0)
343 ja L(bigger)
344 orl $0x20, FLAGS
345 xchg %edx, %eax
346 xchg %ecx, %edi
347L(bigger):
348 lea 15(%edi), %edi
349 sub %ecx, %edi
350 cmp $8, %edi
351 jle L(ashr_less_8)
352 cmp $14, %edi
353 je L(ashr_15)
354 cmp $13, %edi
355 je L(ashr_14)
356 cmp $12, %edi
357 je L(ashr_13)
358 cmp $11, %edi
359 je L(ashr_12)
360 cmp $10, %edi
361 je L(ashr_11)
362 cmp $9, %edi
363 je L(ashr_10)
364L(ashr_less_8):
365 je L(ashr_9)
366 cmp $7, %edi
367 je L(ashr_8)
368 cmp $6, %edi
369 je L(ashr_7)
370 cmp $5, %edi
371 je L(ashr_6)
372 cmp $4, %edi
373 je L(ashr_5)
374 cmp $3, %edi
375 je L(ashr_4)
376 cmp $2, %edi
377 je L(ashr_3)
378 cmp $1, %edi
379 je L(ashr_2)
380 cmp $0, %edi
381 je L(ashr_1)
382
383/*
384 * The following cases will be handled by ashr_0
385 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
386 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
387 */
388 .p2align 4
389L(ashr_0):
390 mov $0xffff, %esi
391 movdqa (%eax), %xmm1
392 pxor %xmm0, %xmm0
393 pcmpeqb %xmm1, %xmm0
394#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
395 movdqa (%edx), %xmm2
396 TOLOWER (%xmm1, %xmm2)
397 pcmpeqb %xmm2, %xmm1
398#else
399 pcmpeqb (%edx), %xmm1
400#endif
401 psubb %xmm0, %xmm1
402 pmovmskb %xmm1, %edi
403 shr %cl, %esi
404 shr %cl, %edi
405 sub %edi, %esi
406 mov %ecx, %edi
407 jne L(less32bytes)
408 UPDATE_STRNCMP_COUNTER
409 movl $0x10, FLAGS
410 mov $0x10, %ecx
411 pxor %xmm0, %xmm0
412 .p2align 4
413L(loop_ashr_0):
414 movdqa (%eax, %ecx), %xmm1
415#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
416 movdqa (%edx, %ecx), %xmm2
417 TOLOWER (%xmm1, %xmm2)
418
419 pcmpeqb %xmm1, %xmm0
420 pcmpeqb %xmm2, %xmm1
421#else
422 pcmpeqb %xmm1, %xmm0
423 pcmpeqb (%edx, %ecx), %xmm1
424#endif
425 psubb %xmm0, %xmm1
426 pmovmskb %xmm1, %esi
427 sub $0xffff, %esi
428 jnz L(exit)
429#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
430 cmp $16, REM
431 lea -16(REM), REM
432 jbe L(more8byteseq)
433#endif
434 add $16, %ecx
435 jmp L(loop_ashr_0)
436
437/*
438 * The following cases will be handled by ashr_1
439 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
440 * n(15) n -15 0(15 +(n-15) - n) ashr_1
441 */
442 .p2align 4
443L(ashr_1):
444 mov $0xffff, %esi
445 pxor %xmm0, %xmm0
446 movdqa (%edx), %xmm2
447 movdqa (%eax), %xmm1
448 pcmpeqb %xmm1, %xmm0
449 pslldq $15, %xmm2
450 TOLOWER (%xmm1, %xmm2)
451 pcmpeqb %xmm1, %xmm2
452 psubb %xmm0, %xmm2
453 pmovmskb %xmm2, %edi
454 shr %cl, %esi
455 shr %cl, %edi
456 sub %edi, %esi
457 lea -15(%ecx), %edi
458 jnz L(less32bytes)
459
460 UPDATE_STRNCMP_COUNTER
461
462 movdqa (%edx), %xmm3
463 pxor %xmm0, %xmm0
464 mov $16, %ecx
465 orl $1, FLAGS
466 lea 1(%edx), %edi
467 and $0xfff, %edi
468 sub $0x1000, %edi
469
470 .p2align 4
471L(loop_ashr_1):
472 add $16, %edi
473 jg L(nibble_ashr_1)
474
475L(gobble_ashr_1):
476 movdqa (%eax, %ecx), %xmm1
477 movdqa (%edx, %ecx), %xmm2
478 movdqa %xmm2, %xmm4
479
480 palignr $1, %xmm3, %xmm2
481 TOLOWER (%xmm1, %xmm2)
482
483 pcmpeqb %xmm1, %xmm0
484 pcmpeqb %xmm2, %xmm1
485 psubb %xmm0, %xmm1
486 pmovmskb %xmm1, %esi
487 sub $0xffff, %esi
488 jnz L(exit)
489#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
490 cmp $16, REM
491 lea -16(REM), REM
492 jbe L(more8byteseq)
493#endif
494
495 add $16, %ecx
496 movdqa %xmm4, %xmm3
497
498 add $16, %edi
499 jg L(nibble_ashr_1)
500
501 movdqa (%eax, %ecx), %xmm1
502 movdqa (%edx, %ecx), %xmm2
503 movdqa %xmm2, %xmm4
504
505 palignr $1, %xmm3, %xmm2
506 TOLOWER (%xmm1, %xmm2)
507
508 pcmpeqb %xmm1, %xmm0
509 pcmpeqb %xmm2, %xmm1
510 psubb %xmm0, %xmm1
511 pmovmskb %xmm1, %esi
512 sub $0xffff, %esi
513 jnz L(exit)
514
515#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
516 cmp $16, REM
517 lea -16(REM), REM
518 jbe L(more8byteseq)
519#endif
520 add $16, %ecx
521 movdqa %xmm4, %xmm3
522 jmp L(loop_ashr_1)
523
524 .p2align 4
525L(nibble_ashr_1):
526 pcmpeqb %xmm3, %xmm0
527 pmovmskb %xmm0, %esi
528 test $0xfffe, %esi
529 jnz L(ashr_1_exittail)
530
531#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
532 cmp $15, REM
533 jbe L(ashr_1_exittail)
534#endif
535 pxor %xmm0, %xmm0
536 sub $0x1000, %edi
537 jmp L(gobble_ashr_1)
538
539 .p2align 4
540L(ashr_1_exittail):
541 movdqa (%eax, %ecx), %xmm1
542 psrldq $1, %xmm0
543 psrldq $1, %xmm3
544 jmp L(aftertail)
545
546/*
547 * The following cases will be handled by ashr_2
548 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
549 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
550 */
551 .p2align 4
552L(ashr_2):
553 mov $0xffff, %esi
554 pxor %xmm0, %xmm0
555 movdqa (%edx), %xmm2
556 movdqa (%eax), %xmm1
557 pcmpeqb %xmm1, %xmm0
558 pslldq $14, %xmm2
559 TOLOWER (%xmm1, %xmm2)
560 pcmpeqb %xmm1, %xmm2
561 psubb %xmm0, %xmm2
562 pmovmskb %xmm2, %edi
563 shr %cl, %esi
564 shr %cl, %edi
565 sub %edi, %esi
566 lea -14(%ecx), %edi
567 jnz L(less32bytes)
568
569 UPDATE_STRNCMP_COUNTER
570
571 movdqa (%edx), %xmm3
572 pxor %xmm0, %xmm0
573 mov $16, %ecx
574 orl $2, FLAGS
575 lea 2(%edx), %edi
576 and $0xfff, %edi
577 sub $0x1000, %edi
578
579 .p2align 4
580L(loop_ashr_2):
581 add $16, %edi
582 jg L(nibble_ashr_2)
583
584L(gobble_ashr_2):
585 movdqa (%eax, %ecx), %xmm1
586 movdqa (%edx, %ecx), %xmm2
587 movdqa %xmm2, %xmm4
588
589 palignr $2, %xmm3, %xmm2
590 TOLOWER (%xmm1, %xmm2)
591
592 pcmpeqb %xmm1, %xmm0
593 pcmpeqb %xmm2, %xmm1
594 psubb %xmm0, %xmm1
595 pmovmskb %xmm1, %esi
596 sub $0xffff, %esi
597 jnz L(exit)
598
599#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
600 cmp $16, REM
601 lea -16(REM), REM
602 jbe L(more8byteseq)
603#endif
604 add $16, %ecx
605 movdqa %xmm4, %xmm3
606
607 add $16, %edi
608 jg L(nibble_ashr_2)
609
610 movdqa (%eax, %ecx), %xmm1
611 movdqa (%edx, %ecx), %xmm2
612 movdqa %xmm2, %xmm4
613
614 palignr $2, %xmm3, %xmm2
615 TOLOWER (%xmm1, %xmm2)
616
617 pcmpeqb %xmm1, %xmm0
618 pcmpeqb %xmm2, %xmm1
619 psubb %xmm0, %xmm1
620 pmovmskb %xmm1, %esi
621 sub $0xffff, %esi
622 jnz L(exit)
623
624#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
625 cmp $16, REM
626 lea -16(REM), REM
627 jbe L(more8byteseq)
628#endif
629 add $16, %ecx
630 movdqa %xmm4, %xmm3
631 jmp L(loop_ashr_2)
632
633 .p2align 4
634L(nibble_ashr_2):
635 pcmpeqb %xmm3, %xmm0
636 pmovmskb %xmm0, %esi
637 test $0xfffc, %esi
638 jnz L(ashr_2_exittail)
639
640#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
641 cmp $14, REM
642 jbe L(ashr_2_exittail)
643#endif
644
645 pxor %xmm0, %xmm0
646 sub $0x1000, %edi
647 jmp L(gobble_ashr_2)
648
649 .p2align 4
650L(ashr_2_exittail):
651 movdqa (%eax, %ecx), %xmm1
652 psrldq $2, %xmm0
653 psrldq $2, %xmm3
654 jmp L(aftertail)
655
656/*
657 * The following cases will be handled by ashr_3
658 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
659 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
660 */
661 .p2align 4
662L(ashr_3):
663 mov $0xffff, %esi
664 pxor %xmm0, %xmm0
665 movdqa (%edx), %xmm2
666 movdqa (%eax), %xmm1
667 pcmpeqb %xmm1, %xmm0
668 pslldq $13, %xmm2
669 TOLOWER (%xmm1, %xmm2)
670 pcmpeqb %xmm1, %xmm2
671 psubb %xmm0, %xmm2
672 pmovmskb %xmm2, %edi
673 shr %cl, %esi
674 shr %cl, %edi
675 sub %edi, %esi
676 lea -13(%ecx), %edi
677 jnz L(less32bytes)
678
679 UPDATE_STRNCMP_COUNTER
680
681 movdqa (%edx), %xmm3
682 pxor %xmm0, %xmm0
683 mov $16, %ecx
684 orl $3, FLAGS
685 lea 3(%edx), %edi
686 and $0xfff, %edi
687 sub $0x1000, %edi
688
689 .p2align 4
690L(loop_ashr_3):
691 add $16, %edi
692 jg L(nibble_ashr_3)
693
694L(gobble_ashr_3):
695 movdqa (%eax, %ecx), %xmm1
696 movdqa (%edx, %ecx), %xmm2
697 movdqa %xmm2, %xmm4
698
699 palignr $3, %xmm3, %xmm2
700 TOLOWER (%xmm1, %xmm2)
701
702 pcmpeqb %xmm1, %xmm0
703 pcmpeqb %xmm2, %xmm1
704 psubb %xmm0, %xmm1
705 pmovmskb %xmm1, %esi
706 sub $0xffff, %esi
707 jnz L(exit)
708
709#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
710 cmp $16, REM
711 lea -16(REM), REM
712 jbe L(more8byteseq)
713#endif
714 add $16, %ecx
715 movdqa %xmm4, %xmm3
716
717 add $16, %edi
718 jg L(nibble_ashr_3)
719
720 movdqa (%eax, %ecx), %xmm1
721 movdqa (%edx, %ecx), %xmm2
722 movdqa %xmm2, %xmm4
723
724 palignr $3, %xmm3, %xmm2
725 TOLOWER (%xmm1, %xmm2)
726
727 pcmpeqb %xmm1, %xmm0
728 pcmpeqb %xmm2, %xmm1
729 psubb %xmm0, %xmm1
730 pmovmskb %xmm1, %esi
731 sub $0xffff, %esi
732 jnz L(exit)
733
734#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
735 cmp $16, REM
736 lea -16(REM), REM
737 jbe L(more8byteseq)
738#endif
739 add $16, %ecx
740 movdqa %xmm4, %xmm3
741 jmp L(loop_ashr_3)
742
743 .p2align 4
744L(nibble_ashr_3):
745 pcmpeqb %xmm3, %xmm0
746 pmovmskb %xmm0, %esi
747 test $0xfff8, %esi
748 jnz L(ashr_3_exittail)
749
750#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
751 cmp $13, REM
752 jbe L(ashr_3_exittail)
753#endif
754 pxor %xmm0, %xmm0
755 sub $0x1000, %edi
756 jmp L(gobble_ashr_3)
757
758 .p2align 4
759L(ashr_3_exittail):
760 movdqa (%eax, %ecx), %xmm1
761 psrldq $3, %xmm0
762 psrldq $3, %xmm3
763 jmp L(aftertail)
764
765/*
766 * The following cases will be handled by ashr_4
767 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
768 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
769 */
770 .p2align 4
771L(ashr_4):
772 mov $0xffff, %esi
773 pxor %xmm0, %xmm0
774 movdqa (%edx), %xmm2
775 movdqa (%eax), %xmm1
776 pcmpeqb %xmm1, %xmm0
777 pslldq $12, %xmm2
778 TOLOWER (%xmm1, %xmm2)
779 pcmpeqb %xmm1, %xmm2
780 psubb %xmm0, %xmm2
781 pmovmskb %xmm2, %edi
782 shr %cl, %esi
783 shr %cl, %edi
784 sub %edi, %esi
785 lea -12(%ecx), %edi
786 jnz L(less32bytes)
787
788 UPDATE_STRNCMP_COUNTER
789
790 movdqa (%edx), %xmm3
791 pxor %xmm0, %xmm0
792 mov $16, %ecx
793 orl $4, FLAGS
794 lea 4(%edx), %edi
795 and $0xfff, %edi
796 sub $0x1000, %edi
797
798 .p2align 4
799L(loop_ashr_4):
800 add $16, %edi
801 jg L(nibble_ashr_4)
802
803L(gobble_ashr_4):
804 movdqa (%eax, %ecx), %xmm1
805 movdqa (%edx, %ecx), %xmm2
806 movdqa %xmm2, %xmm4
807
808 palignr $4, %xmm3, %xmm2
809 TOLOWER (%xmm1, %xmm2)
810
811 pcmpeqb %xmm1, %xmm0
812 pcmpeqb %xmm2, %xmm1
813 psubb %xmm0, %xmm1
814 pmovmskb %xmm1, %esi
815 sub $0xffff, %esi
816 jnz L(exit)
817
818#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
819 cmp $16, REM
820 lea -16(REM), REM
821 jbe L(more8byteseq)
822#endif
823
824 add $16, %ecx
825 movdqa %xmm4, %xmm3
826
827 add $16, %edi
828 jg L(nibble_ashr_4)
829
830 movdqa (%eax, %ecx), %xmm1
831 movdqa (%edx, %ecx), %xmm2
832 movdqa %xmm2, %xmm4
833
834 palignr $4, %xmm3, %xmm2
835 TOLOWER (%xmm1, %xmm2)
836
837 pcmpeqb %xmm1, %xmm0
838 pcmpeqb %xmm2, %xmm1
839 psubb %xmm0, %xmm1
840 pmovmskb %xmm1, %esi
841 sub $0xffff, %esi
842 jnz L(exit)
843
844#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
845 cmp $16, REM
846 lea -16(REM), REM
847 jbe L(more8byteseq)
848#endif
849
850 add $16, %ecx
851 movdqa %xmm4, %xmm3
852 jmp L(loop_ashr_4)
853
854 .p2align 4
855L(nibble_ashr_4):
856 pcmpeqb %xmm3, %xmm0
857 pmovmskb %xmm0, %esi
858 test $0xfff0, %esi
859 jnz L(ashr_4_exittail)
860
861#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
862 cmp $12, REM
863 jbe L(ashr_4_exittail)
864#endif
865
866 pxor %xmm0, %xmm0
867 sub $0x1000, %edi
868 jmp L(gobble_ashr_4)
869
870 .p2align 4
871L(ashr_4_exittail):
872 movdqa (%eax, %ecx), %xmm1
873 psrldq $4, %xmm0
874 psrldq $4, %xmm3
875 jmp L(aftertail)
876
877/*
878 * The following cases will be handled by ashr_5
879 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
880 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
881 */
882 .p2align 4
883L(ashr_5):
884 mov $0xffff, %esi
885 pxor %xmm0, %xmm0
886 movdqa (%edx), %xmm2
887 movdqa (%eax), %xmm1
888 pcmpeqb %xmm1, %xmm0
889 pslldq $11, %xmm2
890 TOLOWER (%xmm1, %xmm2)
891 pcmpeqb %xmm1, %xmm2
892 psubb %xmm0, %xmm2
893 pmovmskb %xmm2, %edi
894 shr %cl, %esi
895 shr %cl, %edi
896 sub %edi, %esi
897 lea -11(%ecx), %edi
898 jnz L(less32bytes)
899
900 UPDATE_STRNCMP_COUNTER
901
902 movdqa (%edx), %xmm3
903 pxor %xmm0, %xmm0
904 mov $16, %ecx
905 orl $5, FLAGS
906 lea 5(%edx), %edi
907 and $0xfff, %edi
908 sub $0x1000, %edi
909
910 .p2align 4
911L(loop_ashr_5):
912 add $16, %edi
913 jg L(nibble_ashr_5)
914
915L(gobble_ashr_5):
916 movdqa (%eax, %ecx), %xmm1
917 movdqa (%edx, %ecx), %xmm2
918 movdqa %xmm2, %xmm4
919
920 palignr $5, %xmm3, %xmm2
921 TOLOWER (%xmm1, %xmm2)
922
923 pcmpeqb %xmm1, %xmm0
924 pcmpeqb %xmm2, %xmm1
925 psubb %xmm0, %xmm1
926 pmovmskb %xmm1, %esi
927 sub $0xffff, %esi
928 jnz L(exit)
929
930#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
931 cmp $16, REM
932 lea -16(REM), REM
933 jbe L(more8byteseq)
934#endif
935 add $16, %ecx
936 movdqa %xmm4, %xmm3
937
938 add $16, %edi
939 jg L(nibble_ashr_5)
940
941 movdqa (%eax, %ecx), %xmm1
942 movdqa (%edx, %ecx), %xmm2
943 movdqa %xmm2, %xmm4
944
945 palignr $5, %xmm3, %xmm2
946 TOLOWER (%xmm1, %xmm2)
947
948 pcmpeqb %xmm1, %xmm0
949 pcmpeqb %xmm2, %xmm1
950 psubb %xmm0, %xmm1
951 pmovmskb %xmm1, %esi
952 sub $0xffff, %esi
953 jnz L(exit)
954
955#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
956 cmp $16, REM
957 lea -16(REM), REM
958 jbe L(more8byteseq)
959#endif
960 add $16, %ecx
961 movdqa %xmm4, %xmm3
962 jmp L(loop_ashr_5)
963
964 .p2align 4
965L(nibble_ashr_5):
966 pcmpeqb %xmm3, %xmm0
967 pmovmskb %xmm0, %esi
968 test $0xffe0, %esi
969 jnz L(ashr_5_exittail)
970
971#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
972 cmp $11, REM
973 jbe L(ashr_5_exittail)
974#endif
975 pxor %xmm0, %xmm0
976 sub $0x1000, %edi
977 jmp L(gobble_ashr_5)
978
979 .p2align 4
980L(ashr_5_exittail):
981 movdqa (%eax, %ecx), %xmm1
982 psrldq $5, %xmm0
983 psrldq $5, %xmm3
984 jmp L(aftertail)
985
986/*
987 * The following cases will be handled by ashr_6
988 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
989 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
990 */
991
992 .p2align 4
993L(ashr_6):
994 mov $0xffff, %esi
995 pxor %xmm0, %xmm0
996 movdqa (%edx), %xmm2
997 movdqa (%eax), %xmm1
998 pcmpeqb %xmm1, %xmm0
999 pslldq $10, %xmm2
1000 TOLOWER (%xmm1, %xmm2)
1001 pcmpeqb %xmm1, %xmm2
1002 psubb %xmm0, %xmm2
1003 pmovmskb %xmm2, %edi
1004 shr %cl, %esi
1005 shr %cl, %edi
1006 sub %edi, %esi
1007 lea -10(%ecx), %edi
1008 jnz L(less32bytes)
1009
1010 UPDATE_STRNCMP_COUNTER
1011
1012 movdqa (%edx), %xmm3
1013 pxor %xmm0, %xmm0
1014 mov $16, %ecx
1015 orl $6, FLAGS
1016 lea 6(%edx), %edi
1017 and $0xfff, %edi
1018 sub $0x1000, %edi
1019
1020 .p2align 4
1021L(loop_ashr_6):
1022 add $16, %edi
1023 jg L(nibble_ashr_6)
1024
1025L(gobble_ashr_6):
1026 movdqa (%eax, %ecx), %xmm1
1027 movdqa (%edx, %ecx), %xmm2
1028 movdqa %xmm2, %xmm4
1029
1030 palignr $6, %xmm3, %xmm2
1031 TOLOWER (%xmm1, %xmm2)
1032
1033 pcmpeqb %xmm1, %xmm0
1034 pcmpeqb %xmm2, %xmm1
1035 psubb %xmm0, %xmm1
1036 pmovmskb %xmm1, %esi
1037 sub $0xffff, %esi
1038 jnz L(exit)
1039
1040#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1041 cmp $16, REM
1042 lea -16(REM), REM
1043 jbe L(more8byteseq)
1044#endif
1045
1046 add $16, %ecx
1047 movdqa %xmm4, %xmm3
1048
1049 add $16, %edi
1050 jg L(nibble_ashr_6)
1051
1052 movdqa (%eax, %ecx), %xmm1
1053 movdqa (%edx, %ecx), %xmm2
1054 movdqa %xmm2, %xmm4
1055
1056 palignr $6, %xmm3, %xmm2
1057 TOLOWER (%xmm1, %xmm2)
1058
1059 pcmpeqb %xmm1, %xmm0
1060 pcmpeqb %xmm2, %xmm1
1061 psubb %xmm0, %xmm1
1062 pmovmskb %xmm1, %esi
1063 sub $0xffff, %esi
1064 jnz L(exit)
1065#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1066 cmp $16, REM
1067 lea -16(REM), REM
1068 jbe L(more8byteseq)
1069#endif
1070
1071 add $16, %ecx
1072 movdqa %xmm4, %xmm3
1073 jmp L(loop_ashr_6)
1074
1075 .p2align 4
1076L(nibble_ashr_6):
1077 pcmpeqb %xmm3, %xmm0
1078 pmovmskb %xmm0, %esi
1079 test $0xffc0, %esi
1080 jnz L(ashr_6_exittail)
1081
1082#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1083 cmp $10, REM
1084 jbe L(ashr_6_exittail)
1085#endif
1086 pxor %xmm0, %xmm0
1087 sub $0x1000, %edi
1088 jmp L(gobble_ashr_6)
1089
1090 .p2align 4
1091L(ashr_6_exittail):
1092 movdqa (%eax, %ecx), %xmm1
1093 psrldq $6, %xmm0
1094 psrldq $6, %xmm3
1095 jmp L(aftertail)
1096
1097/*
1098 * The following cases will be handled by ashr_7
1099 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1100 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
1101 */
1102
1103 .p2align 4
1104L(ashr_7):
1105 mov $0xffff, %esi
1106 pxor %xmm0, %xmm0
1107 movdqa (%edx), %xmm2
1108 movdqa (%eax), %xmm1
1109 pcmpeqb %xmm1, %xmm0
1110 pslldq $9, %xmm2
1111 TOLOWER (%xmm1, %xmm2)
1112 pcmpeqb %xmm1, %xmm2
1113 psubb %xmm0, %xmm2
1114 pmovmskb %xmm2, %edi
1115 shr %cl, %esi
1116 shr %cl, %edi
1117 sub %edi, %esi
1118 lea -9(%ecx), %edi
1119 jnz L(less32bytes)
1120
1121 UPDATE_STRNCMP_COUNTER
1122
1123 movdqa (%edx), %xmm3
1124 pxor %xmm0, %xmm0
1125 mov $16, %ecx
1126 orl $7, FLAGS
1127 lea 8(%edx), %edi
1128 and $0xfff, %edi
1129 sub $0x1000, %edi
1130
1131 .p2align 4
1132L(loop_ashr_7):
1133 add $16, %edi
1134 jg L(nibble_ashr_7)
1135
1136L(gobble_ashr_7):
1137 movdqa (%eax, %ecx), %xmm1
1138 movdqa (%edx, %ecx), %xmm2
1139 movdqa %xmm2, %xmm4
1140
1141 palignr $7, %xmm3, %xmm2
1142 TOLOWER (%xmm1, %xmm2)
1143
1144 pcmpeqb %xmm1, %xmm0
1145 pcmpeqb %xmm2, %xmm1
1146 psubb %xmm0, %xmm1
1147 pmovmskb %xmm1, %esi
1148 sub $0xffff, %esi
1149 jnz L(exit)
1150
1151#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1152 cmp $16, REM
1153 lea -16(REM), REM
1154 jbe L(more8byteseq)
1155#endif
1156
1157 add $16, %ecx
1158 movdqa %xmm4, %xmm3
1159
1160 add $16, %edi
1161 jg L(nibble_ashr_7)
1162
1163 movdqa (%eax, %ecx), %xmm1
1164 movdqa (%edx, %ecx), %xmm2
1165 movdqa %xmm2, %xmm4
1166
1167 palignr $7, %xmm3, %xmm2
1168 TOLOWER (%xmm1, %xmm2)
1169
1170 pcmpeqb %xmm1, %xmm0
1171 pcmpeqb %xmm2, %xmm1
1172 psubb %xmm0, %xmm1
1173 pmovmskb %xmm1, %esi
1174 sub $0xffff, %esi
1175 jnz L(exit)
1176
1177#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1178 cmp $16, REM
1179 lea -16(REM), REM
1180 jbe L(more8byteseq)
1181#endif
1182
1183 add $16, %ecx
1184 movdqa %xmm4, %xmm3
1185 jmp L(loop_ashr_7)
1186
1187 .p2align 4
1188L(nibble_ashr_7):
1189 pcmpeqb %xmm3, %xmm0
1190 pmovmskb %xmm0, %esi
1191 test $0xff80, %esi
1192 jnz L(ashr_7_exittail)
1193
1194#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1195 cmp $9, REM
1196 jbe L(ashr_7_exittail)
1197#endif
1198 pxor %xmm0, %xmm0
1199 pxor %xmm0, %xmm0
1200 sub $0x1000, %edi
1201 jmp L(gobble_ashr_7)
1202
1203 .p2align 4
1204L(ashr_7_exittail):
1205 movdqa (%eax, %ecx), %xmm1
1206 psrldq $7, %xmm0
1207 psrldq $7, %xmm3
1208 jmp L(aftertail)
1209
1210/*
1211 * The following cases will be handled by ashr_8
1212 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1213 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
1214 */
1215 .p2align 4
1216L(ashr_8):
1217 mov $0xffff, %esi
1218 pxor %xmm0, %xmm0
1219 movdqa (%edx), %xmm2
1220 movdqa (%eax), %xmm1
1221 pcmpeqb %xmm1, %xmm0
1222 pslldq $8, %xmm2
1223 TOLOWER (%xmm1, %xmm2)
1224 pcmpeqb %xmm1, %xmm2
1225 psubb %xmm0, %xmm2
1226 pmovmskb %xmm2, %edi
1227 shr %cl, %esi
1228 shr %cl, %edi
1229 sub %edi, %esi
1230 lea -8(%ecx), %edi
1231 jnz L(less32bytes)
1232
1233 UPDATE_STRNCMP_COUNTER
1234
1235 movdqa (%edx), %xmm3
1236 pxor %xmm0, %xmm0
1237 mov $16, %ecx
1238 orl $8, FLAGS
1239 lea 8(%edx), %edi
1240 and $0xfff, %edi
1241 sub $0x1000, %edi
1242
1243 .p2align 4
1244L(loop_ashr_8):
1245 add $16, %edi
1246 jg L(nibble_ashr_8)
1247
1248L(gobble_ashr_8):
1249 movdqa (%eax, %ecx), %xmm1
1250 movdqa (%edx, %ecx), %xmm2
1251 movdqa %xmm2, %xmm4
1252
1253 palignr $8, %xmm3, %xmm2
1254 TOLOWER (%xmm1, %xmm2)
1255
1256 pcmpeqb %xmm1, %xmm0
1257 pcmpeqb %xmm2, %xmm1
1258 psubb %xmm0, %xmm1
1259 pmovmskb %xmm1, %esi
1260 sub $0xffff, %esi
1261 jnz L(exit)
1262
1263#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1264 cmp $16, REM
1265 lea -16(REM), REM
1266 jbe L(more8byteseq)
1267#endif
1268 add $16, %ecx
1269 movdqa %xmm4, %xmm3
1270
1271 add $16, %edi
1272 jg L(nibble_ashr_8)
1273
1274 movdqa (%eax, %ecx), %xmm1
1275 movdqa (%edx, %ecx), %xmm2
1276 movdqa %xmm2, %xmm4
1277
1278 palignr $8, %xmm3, %xmm2
1279 TOLOWER (%xmm1, %xmm2)
1280
1281 pcmpeqb %xmm1, %xmm0
1282 pcmpeqb %xmm2, %xmm1
1283 psubb %xmm0, %xmm1
1284 pmovmskb %xmm1, %esi
1285 sub $0xffff, %esi
1286 jnz L(exit)
1287
1288#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1289 cmp $16, REM
1290 lea -16(REM), REM
1291 jbe L(more8byteseq)
1292#endif
1293 add $16, %ecx
1294 movdqa %xmm4, %xmm3
1295 jmp L(loop_ashr_8)
1296
1297 .p2align 4
1298L(nibble_ashr_8):
1299 pcmpeqb %xmm3, %xmm0
1300 pmovmskb %xmm0, %esi
1301 test $0xff00, %esi
1302 jnz L(ashr_8_exittail)
1303
1304#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1305 cmp $8, REM
1306 jbe L(ashr_8_exittail)
1307#endif
1308 pxor %xmm0, %xmm0
1309 pxor %xmm0, %xmm0
1310 sub $0x1000, %edi
1311 jmp L(gobble_ashr_8)
1312
1313 .p2align 4
1314L(ashr_8_exittail):
1315 movdqa (%eax, %ecx), %xmm1
1316 psrldq $8, %xmm0
1317 psrldq $8, %xmm3
1318 jmp L(aftertail)
1319
1320/*
1321 * The following cases will be handled by ashr_9
1322 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1323 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
1324 */
1325 .p2align 4
1326L(ashr_9):
1327 mov $0xffff, %esi
1328 pxor %xmm0, %xmm0
1329 movdqa (%edx), %xmm2
1330 movdqa (%eax), %xmm1
1331 pcmpeqb %xmm1, %xmm0
1332 pslldq $7, %xmm2
1333 TOLOWER (%xmm1, %xmm2)
1334 pcmpeqb %xmm1, %xmm2
1335 psubb %xmm0, %xmm2
1336 pmovmskb %xmm2, %edi
1337 shr %cl, %esi
1338 shr %cl, %edi
1339 sub %edi, %esi
1340 lea -7(%ecx), %edi
1341 jnz L(less32bytes)
1342
1343 UPDATE_STRNCMP_COUNTER
1344
1345 movdqa (%edx), %xmm3
1346 pxor %xmm0, %xmm0
1347 mov $16, %ecx
1348 orl $9, FLAGS
1349 lea 9(%edx), %edi
1350 and $0xfff, %edi
1351 sub $0x1000, %edi
1352
1353 .p2align 4
1354L(loop_ashr_9):
1355 add $16, %edi
1356 jg L(nibble_ashr_9)
1357
1358L(gobble_ashr_9):
1359 movdqa (%eax, %ecx), %xmm1
1360 movdqa (%edx, %ecx), %xmm2
1361 movdqa %xmm2, %xmm4
1362
1363 palignr $9, %xmm3, %xmm2
1364 TOLOWER (%xmm1, %xmm2)
1365
1366 pcmpeqb %xmm1, %xmm0
1367 pcmpeqb %xmm2, %xmm1
1368 psubb %xmm0, %xmm1
1369 pmovmskb %xmm1, %esi
1370 sub $0xffff, %esi
1371 jnz L(exit)
1372
1373#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1374 cmp $16, REM
1375 lea -16(REM), REM
1376 jbe L(more8byteseq)
1377#endif
1378 add $16, %ecx
1379 movdqa %xmm4, %xmm3
1380
1381 add $16, %edi
1382 jg L(nibble_ashr_9)
1383
1384 movdqa (%eax, %ecx), %xmm1
1385 movdqa (%edx, %ecx), %xmm2
1386 movdqa %xmm2, %xmm4
1387
1388 palignr $9, %xmm3, %xmm2
1389 TOLOWER (%xmm1, %xmm2)
1390
1391 pcmpeqb %xmm1, %xmm0
1392 pcmpeqb %xmm2, %xmm1
1393 psubb %xmm0, %xmm1
1394 pmovmskb %xmm1, %esi
1395 sub $0xffff, %esi
1396 jnz L(exit)
1397
1398#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1399 cmp $16, REM
1400 lea -16(REM), REM
1401 jbe L(more8byteseq)
1402#endif
1403 add $16, %ecx
1404 movdqa %xmm4, %xmm3
1405 jmp L(loop_ashr_9)
1406
1407 .p2align 4
1408L(nibble_ashr_9):
1409 pcmpeqb %xmm3, %xmm0
1410 pmovmskb %xmm0, %esi
1411 test $0xfe00, %esi
1412 jnz L(ashr_9_exittail)
1413
1414#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1415 cmp $7, REM
1416 jbe L(ashr_9_exittail)
1417#endif
1418 pxor %xmm0, %xmm0
1419 sub $0x1000, %edi
1420 jmp L(gobble_ashr_9)
1421
1422 .p2align 4
1423L(ashr_9_exittail):
1424 movdqa (%eax, %ecx), %xmm1
1425 psrldq $9, %xmm0
1426 psrldq $9, %xmm3
1427 jmp L(aftertail)
1428
1429/*
1430 * The following cases will be handled by ashr_10
1431 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1432 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
1433 */
1434 .p2align 4
1435L(ashr_10):
1436 mov $0xffff, %esi
1437 pxor %xmm0, %xmm0
1438 movdqa (%edx), %xmm2
1439 movdqa (%eax), %xmm1
1440 pcmpeqb %xmm1, %xmm0
1441 pslldq $6, %xmm2
1442 TOLOWER (%xmm1, %xmm2)
1443 pcmpeqb %xmm1, %xmm2
1444 psubb %xmm0, %xmm2
1445 pmovmskb %xmm2, %edi
1446 shr %cl, %esi
1447 shr %cl, %edi
1448 sub %edi, %esi
1449 lea -6(%ecx), %edi
1450 jnz L(less32bytes)
1451
1452 UPDATE_STRNCMP_COUNTER
1453
1454 movdqa (%edx), %xmm3
1455 pxor %xmm0, %xmm0
1456 mov $16, %ecx
1457 orl $10, FLAGS
1458 lea 10(%edx), %edi
1459 and $0xfff, %edi
1460 sub $0x1000, %edi
1461
1462 .p2align 4
1463L(loop_ashr_10):
1464 add $16, %edi
1465 jg L(nibble_ashr_10)
1466
1467L(gobble_ashr_10):
1468 movdqa (%eax, %ecx), %xmm1
1469 movdqa (%edx, %ecx), %xmm2
1470 movdqa %xmm2, %xmm4
1471
1472 palignr $10, %xmm3, %xmm2
1473 TOLOWER (%xmm1, %xmm2)
1474
1475 pcmpeqb %xmm1, %xmm0
1476 pcmpeqb %xmm2, %xmm1
1477 psubb %xmm0, %xmm1
1478 pmovmskb %xmm1, %esi
1479 sub $0xffff, %esi
1480 jnz L(exit)
1481
1482#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1483 cmp $16, REM
1484 lea -16(REM), REM
1485 jbe L(more8byteseq)
1486#endif
1487 add $16, %ecx
1488 movdqa %xmm4, %xmm3
1489
1490 add $16, %edi
1491 jg L(nibble_ashr_10)
1492
1493 movdqa (%eax, %ecx), %xmm1
1494 movdqa (%edx, %ecx), %xmm2
1495 movdqa %xmm2, %xmm4
1496
1497 palignr $10, %xmm3, %xmm2
1498 TOLOWER (%xmm1, %xmm2)
1499
1500 pcmpeqb %xmm1, %xmm0
1501 pcmpeqb %xmm2, %xmm1
1502 psubb %xmm0, %xmm1
1503 pmovmskb %xmm1, %esi
1504 sub $0xffff, %esi
1505 jnz L(exit)
1506
1507#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1508 cmp $16, REM
1509 lea -16(REM), REM
1510 jbe L(more8byteseq)
1511#endif
1512 add $16, %ecx
1513 movdqa %xmm4, %xmm3
1514 jmp L(loop_ashr_10)
1515
1516 .p2align 4
1517L(nibble_ashr_10):
1518 pcmpeqb %xmm3, %xmm0
1519 pmovmskb %xmm0, %esi
1520 test $0xfc00, %esi
1521 jnz L(ashr_10_exittail)
1522
1523#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1524 cmp $6, REM
1525 jbe L(ashr_10_exittail)
1526#endif
1527 pxor %xmm0, %xmm0
1528 sub $0x1000, %edi
1529 jmp L(gobble_ashr_10)
1530
1531 .p2align 4
1532L(ashr_10_exittail):
1533 movdqa (%eax, %ecx), %xmm1
1534 psrldq $10, %xmm0
1535 psrldq $10, %xmm3
1536 jmp L(aftertail)
1537
1538/*
1539 * The following cases will be handled by ashr_11
1540 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1541 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
1542 */
1543 .p2align 4
1544L(ashr_11):
1545 mov $0xffff, %esi
1546 pxor %xmm0, %xmm0
1547 movdqa (%edx), %xmm2
1548 movdqa (%eax), %xmm1
1549 pcmpeqb %xmm1, %xmm0
1550 pslldq $5, %xmm2
1551 TOLOWER (%xmm1, %xmm2)
1552 pcmpeqb %xmm1, %xmm2
1553 psubb %xmm0, %xmm2
1554 pmovmskb %xmm2, %edi
1555 shr %cl, %esi
1556 shr %cl, %edi
1557 sub %edi, %esi
1558 lea -5(%ecx), %edi
1559 jnz L(less32bytes)
1560
1561 UPDATE_STRNCMP_COUNTER
1562
1563 movdqa (%edx), %xmm3
1564 pxor %xmm0, %xmm0
1565 mov $16, %ecx
1566 orl $11, FLAGS
1567 lea 11(%edx), %edi
1568 and $0xfff, %edi
1569 sub $0x1000, %edi
1570
1571 .p2align 4
1572L(loop_ashr_11):
1573 add $16, %edi
1574 jg L(nibble_ashr_11)
1575
1576L(gobble_ashr_11):
1577 movdqa (%eax, %ecx), %xmm1
1578 movdqa (%edx, %ecx), %xmm2
1579 movdqa %xmm2, %xmm4
1580
1581 palignr $11, %xmm3, %xmm2
1582 TOLOWER (%xmm1, %xmm2)
1583
1584 pcmpeqb %xmm1, %xmm0
1585 pcmpeqb %xmm2, %xmm1
1586 psubb %xmm0, %xmm1
1587 pmovmskb %xmm1, %esi
1588 sub $0xffff, %esi
1589 jnz L(exit)
1590
1591#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1592 cmp $16, REM
1593 lea -16(REM), REM
1594 jbe L(more8byteseq)
1595#endif
1596 add $16, %ecx
1597 movdqa %xmm4, %xmm3
1598
1599 add $16, %edi
1600 jg L(nibble_ashr_11)
1601
1602 movdqa (%eax, %ecx), %xmm1
1603 movdqa (%edx, %ecx), %xmm2
1604 movdqa %xmm2, %xmm4
1605
1606 palignr $11, %xmm3, %xmm2
1607 TOLOWER (%xmm1, %xmm2)
1608
1609 pcmpeqb %xmm1, %xmm0
1610 pcmpeqb %xmm2, %xmm1
1611 psubb %xmm0, %xmm1
1612 pmovmskb %xmm1, %esi
1613 sub $0xffff, %esi
1614 jnz L(exit)
1615
1616#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1617 cmp $16, REM
1618 lea -16(REM), REM
1619 jbe L(more8byteseq)
1620#endif
1621 add $16, %ecx
1622 movdqa %xmm4, %xmm3
1623 jmp L(loop_ashr_11)
1624
1625 .p2align 4
1626L(nibble_ashr_11):
1627 pcmpeqb %xmm3, %xmm0
1628 pmovmskb %xmm0, %esi
1629 test $0xf800, %esi
1630 jnz L(ashr_11_exittail)
1631
1632#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1633 cmp $5, REM
1634 jbe L(ashr_11_exittail)
1635#endif
1636 pxor %xmm0, %xmm0
1637 sub $0x1000, %edi
1638 jmp L(gobble_ashr_11)
1639
1640 .p2align 4
1641L(ashr_11_exittail):
1642 movdqa (%eax, %ecx), %xmm1
1643 psrldq $11, %xmm0
1644 psrldq $11, %xmm3
1645 jmp L(aftertail)
1646
1647/*
1648 * The following cases will be handled by ashr_12
1649 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1650 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
1651 */
1652 .p2align 4
1653L(ashr_12):
1654 mov $0xffff, %esi
1655 pxor %xmm0, %xmm0
1656 movdqa (%edx), %xmm2
1657 movdqa (%eax), %xmm1
1658 pcmpeqb %xmm1, %xmm0
1659 pslldq $4, %xmm2
1660 TOLOWER (%xmm1, %xmm2)
1661 pcmpeqb %xmm1, %xmm2
1662 psubb %xmm0, %xmm2
1663 pmovmskb %xmm2, %edi
1664 shr %cl, %esi
1665 shr %cl, %edi
1666 sub %edi, %esi
1667 lea -4(%ecx), %edi
1668 jnz L(less32bytes)
1669
1670 UPDATE_STRNCMP_COUNTER
1671
1672 movdqa (%edx), %xmm3
1673 pxor %xmm0, %xmm0
1674 mov $16, %ecx
1675 orl $12, FLAGS
1676 lea 12(%edx), %edi
1677 and $0xfff, %edi
1678 sub $0x1000, %edi
1679
1680 .p2align 4
1681L(loop_ashr_12):
1682 add $16, %edi
1683 jg L(nibble_ashr_12)
1684
1685L(gobble_ashr_12):
1686 movdqa (%eax, %ecx), %xmm1
1687 movdqa (%edx, %ecx), %xmm2
1688 movdqa %xmm2, %xmm4
1689
1690 palignr $12, %xmm3, %xmm2
1691 TOLOWER (%xmm1, %xmm2)
1692
1693 pcmpeqb %xmm1, %xmm0
1694 pcmpeqb %xmm2, %xmm1
1695 psubb %xmm0, %xmm1
1696 pmovmskb %xmm1, %esi
1697 sub $0xffff, %esi
1698 jnz L(exit)
1699
1700#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1701 cmp $16, REM
1702 lea -16(REM), REM
1703 jbe L(more8byteseq)
1704#endif
1705
1706 add $16, %ecx
1707 movdqa %xmm4, %xmm3
1708
1709 add $16, %edi
1710 jg L(nibble_ashr_12)
1711
1712 movdqa (%eax, %ecx), %xmm1
1713 movdqa (%edx, %ecx), %xmm2
1714 movdqa %xmm2, %xmm4
1715
1716 palignr $12, %xmm3, %xmm2
1717 TOLOWER (%xmm1, %xmm2)
1718
1719 pcmpeqb %xmm1, %xmm0
1720 pcmpeqb %xmm2, %xmm1
1721 psubb %xmm0, %xmm1
1722 pmovmskb %xmm1, %esi
1723 sub $0xffff, %esi
1724 jnz L(exit)
1725
1726#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1727 cmp $16, REM
1728 lea -16(REM), REM
1729 jbe L(more8byteseq)
1730#endif
1731 add $16, %ecx
1732 movdqa %xmm4, %xmm3
1733 jmp L(loop_ashr_12)
1734
1735 .p2align 4
1736L(nibble_ashr_12):
1737 pcmpeqb %xmm3, %xmm0
1738 pmovmskb %xmm0, %esi
1739 test $0xf000, %esi
1740 jnz L(ashr_12_exittail)
1741
1742#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1743 cmp $4, REM
1744 jbe L(ashr_12_exittail)
1745#endif
1746 pxor %xmm0, %xmm0
1747 sub $0x1000, %edi
1748 jmp L(gobble_ashr_12)
1749
1750 .p2align 4
1751L(ashr_12_exittail):
1752 movdqa (%eax, %ecx), %xmm1
1753 psrldq $12, %xmm0
1754 psrldq $12, %xmm3
1755 jmp L(aftertail)
1756
1757/*
1758 * The following cases will be handled by ashr_13
1759 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1760 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
1761 */
1762 .p2align 4
1763L(ashr_13):
1764 mov $0xffff, %esi
1765 pxor %xmm0, %xmm0
1766 movdqa (%edx), %xmm2
1767 movdqa (%eax), %xmm1
1768 pcmpeqb %xmm1, %xmm0
1769 pslldq $3, %xmm2
1770 TOLOWER (%xmm1, %xmm2)
1771 pcmpeqb %xmm1, %xmm2
1772 psubb %xmm0, %xmm2
1773 pmovmskb %xmm2, %edi
1774 shr %cl, %esi
1775 shr %cl, %edi
1776 sub %edi, %esi
1777 lea -3(%ecx), %edi
1778 jnz L(less32bytes)
1779
1780 UPDATE_STRNCMP_COUNTER
1781
1782 movdqa (%edx), %xmm3
1783 pxor %xmm0, %xmm0
1784 mov $16, %ecx
1785 orl $13, FLAGS
1786 lea 13(%edx), %edi
1787 and $0xfff, %edi
1788 sub $0x1000, %edi
1789
1790 .p2align 4
1791L(loop_ashr_13):
1792 add $16, %edi
1793 jg L(nibble_ashr_13)
1794
1795L(gobble_ashr_13):
1796 movdqa (%eax, %ecx), %xmm1
1797 movdqa (%edx, %ecx), %xmm2
1798 movdqa %xmm2, %xmm4
1799
1800 palignr $13, %xmm3, %xmm2
1801 TOLOWER (%xmm1, %xmm2)
1802
1803 pcmpeqb %xmm1, %xmm0
1804 pcmpeqb %xmm2, %xmm1
1805 psubb %xmm0, %xmm1
1806 pmovmskb %xmm1, %esi
1807 sub $0xffff, %esi
1808 jnz L(exit)
1809
1810#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1811 cmp $16, REM
1812 lea -16(REM), REM
1813 jbe L(more8byteseq)
1814#endif
1815 add $16, %ecx
1816 movdqa %xmm4, %xmm3
1817
1818 add $16, %edi
1819 jg L(nibble_ashr_13)
1820
1821 movdqa (%eax, %ecx), %xmm1
1822 movdqa (%edx, %ecx), %xmm2
1823 movdqa %xmm2, %xmm4
1824
1825 palignr $13, %xmm3, %xmm2
1826 TOLOWER (%xmm1, %xmm2)
1827
1828 pcmpeqb %xmm1, %xmm0
1829 pcmpeqb %xmm2, %xmm1
1830 psubb %xmm0, %xmm1
1831 pmovmskb %xmm1, %esi
1832 sub $0xffff, %esi
1833 jnz L(exit)
1834
1835#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1836 cmp $16, REM
1837 lea -16(REM), REM
1838 jbe L(more8byteseq)
1839#endif
1840 add $16, %ecx
1841 movdqa %xmm4, %xmm3
1842 jmp L(loop_ashr_13)
1843
1844 .p2align 4
1845L(nibble_ashr_13):
1846 pcmpeqb %xmm3, %xmm0
1847 pmovmskb %xmm0, %esi
1848 test $0xe000, %esi
1849 jnz L(ashr_13_exittail)
1850
1851#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1852 cmp $3, REM
1853 jbe L(ashr_13_exittail)
1854#endif
1855 pxor %xmm0, %xmm0
1856 sub $0x1000, %edi
1857 jmp L(gobble_ashr_13)
1858
1859 .p2align 4
1860L(ashr_13_exittail):
1861 movdqa (%eax, %ecx), %xmm1
1862 psrldq $13, %xmm0
1863 psrldq $13, %xmm3
1864 jmp L(aftertail)
1865
1866/*
1867 * The following cases will be handled by ashr_14
1868 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1869 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
1870 */
1871 .p2align 4
1872L(ashr_14):
1873 mov $0xffff, %esi
1874 pxor %xmm0, %xmm0
1875 movdqa (%edx), %xmm2
1876 movdqa (%eax), %xmm1
1877 pcmpeqb %xmm1, %xmm0
1878 pslldq $2, %xmm2
1879 TOLOWER (%xmm1, %xmm2)
1880 pcmpeqb %xmm1, %xmm2
1881 psubb %xmm0, %xmm2
1882 pmovmskb %xmm2, %edi
1883 shr %cl, %esi
1884 shr %cl, %edi
1885 sub %edi, %esi
1886 lea -2(%ecx), %edi
1887 jnz L(less32bytes)
1888
1889 UPDATE_STRNCMP_COUNTER
1890
1891 movdqa (%edx), %xmm3
1892 pxor %xmm0, %xmm0
1893 mov $16, %ecx
1894 orl $14, FLAGS
1895 lea 14(%edx), %edi
1896 and $0xfff, %edi
1897 sub $0x1000, %edi
1898
1899 .p2align 4
1900L(loop_ashr_14):
1901 add $16, %edi
1902 jg L(nibble_ashr_14)
1903
1904L(gobble_ashr_14):
1905 movdqa (%eax, %ecx), %xmm1
1906 movdqa (%edx, %ecx), %xmm2
1907 movdqa %xmm2, %xmm4
1908
1909 palignr $14, %xmm3, %xmm2
1910 TOLOWER (%xmm1, %xmm2)
1911
1912 pcmpeqb %xmm1, %xmm0
1913 pcmpeqb %xmm2, %xmm1
1914 psubb %xmm0, %xmm1
1915 pmovmskb %xmm1, %esi
1916 sub $0xffff, %esi
1917 jnz L(exit)
1918
1919#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1920 cmp $16, REM
1921 lea -16(REM), REM
1922 jbe L(more8byteseq)
1923#endif
1924 add $16, %ecx
1925 movdqa %xmm4, %xmm3
1926
1927 add $16, %edi
1928 jg L(nibble_ashr_14)
1929
1930 movdqa (%eax, %ecx), %xmm1
1931 movdqa (%edx, %ecx), %xmm2
1932 movdqa %xmm2, %xmm4
1933
1934 palignr $14, %xmm3, %xmm2
1935 TOLOWER (%xmm1, %xmm2)
1936
1937 pcmpeqb %xmm1, %xmm0
1938 pcmpeqb %xmm2, %xmm1
1939 psubb %xmm0, %xmm1
1940 pmovmskb %xmm1, %esi
1941 sub $0xffff, %esi
1942 jnz L(exit)
1943
1944#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1945 cmp $16, REM
1946 lea -16(REM), REM
1947 jbe L(more8byteseq)
1948#endif
1949 add $16, %ecx
1950 movdqa %xmm4, %xmm3
1951 jmp L(loop_ashr_14)
1952
1953 .p2align 4
1954L(nibble_ashr_14):
1955 pcmpeqb %xmm3, %xmm0
1956 pmovmskb %xmm0, %esi
1957 test $0xc000, %esi
1958 jnz L(ashr_14_exittail)
1959
1960#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1961 cmp $2, REM
1962 jbe L(ashr_14_exittail)
1963#endif
1964 pxor %xmm0, %xmm0
1965 sub $0x1000, %edi
1966 jmp L(gobble_ashr_14)
1967
1968 .p2align 4
1969L(ashr_14_exittail):
1970 movdqa (%eax, %ecx), %xmm1
1971 psrldq $14, %xmm0
1972 psrldq $14, %xmm3
1973 jmp L(aftertail)
1974
1975/*
1976 * The following cases will be handled by ashr_14
1977 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1978 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
1979 */
1980
1981 .p2align 4
1982L(ashr_15):
1983 mov $0xffff, %esi
1984 pxor %xmm0, %xmm0
1985 movdqa (%edx), %xmm2
1986 movdqa (%eax), %xmm1
1987 pcmpeqb %xmm1, %xmm0
1988 pslldq $1, %xmm2
1989 TOLOWER (%xmm1, %xmm2)
1990 pcmpeqb %xmm1, %xmm2
1991 psubb %xmm0, %xmm2
1992 pmovmskb %xmm2, %edi
1993 shr %cl, %esi
1994 shr %cl, %edi
1995 sub %edi, %esi
1996 lea -1(%ecx), %edi
1997 jnz L(less32bytes)
1998
1999 UPDATE_STRNCMP_COUNTER
2000
2001 movdqa (%edx), %xmm3
2002 pxor %xmm0, %xmm0
2003 mov $16, %ecx
2004 orl $15, FLAGS
2005 lea 15(%edx), %edi
2006 and $0xfff, %edi
2007 sub $0x1000, %edi
2008
2009 .p2align 4
2010L(loop_ashr_15):
2011 add $16, %edi
2012 jg L(nibble_ashr_15)
2013
2014L(gobble_ashr_15):
2015 movdqa (%eax, %ecx), %xmm1
2016 movdqa (%edx, %ecx), %xmm2
2017 movdqa %xmm2, %xmm4
2018
2019 palignr $15, %xmm3, %xmm2
2020 TOLOWER (%xmm1, %xmm2)
2021
2022 pcmpeqb %xmm1, %xmm0
2023 pcmpeqb %xmm2, %xmm1
2024 psubb %xmm0, %xmm1
2025 pmovmskb %xmm1, %esi
2026 sub $0xffff, %esi
2027 jnz L(exit)
2028
2029#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2030 cmp $16, REM
2031 lea -16(REM), REM
2032 jbe L(more8byteseq)
2033#endif
2034 add $16, %ecx
2035 movdqa %xmm4, %xmm3
2036
2037 add $16, %edi
2038 jg L(nibble_ashr_15)
2039
2040 movdqa (%eax, %ecx), %xmm1
2041 movdqa (%edx, %ecx), %xmm2
2042 movdqa %xmm2, %xmm4
2043
2044 palignr $15, %xmm3, %xmm2
2045 TOLOWER (%xmm1, %xmm2)
2046
2047 pcmpeqb %xmm1, %xmm0
2048 pcmpeqb %xmm2, %xmm1
2049 psubb %xmm0, %xmm1
2050 pmovmskb %xmm1, %esi
2051 sub $0xffff, %esi
2052 jnz L(exit)
2053
2054#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2055 cmp $16, REM
2056 lea -16(REM), REM
2057 jbe L(more8byteseq)
2058#endif
2059 add $16, %ecx
2060 movdqa %xmm4, %xmm3
2061 jmp L(loop_ashr_15)
2062
2063 .p2align 4
2064L(nibble_ashr_15):
2065 pcmpeqb %xmm3, %xmm0
2066 pmovmskb %xmm0, %esi
2067 test $0x8000, %esi
2068 jnz L(ashr_15_exittail)
2069
2070#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2071 cmp $1, REM
2072 jbe L(ashr_15_exittail)
2073#endif
2074 pxor %xmm0, %xmm0
2075 sub $0x1000, %edi
2076 jmp L(gobble_ashr_15)
2077
2078 .p2align 4
2079L(ashr_15_exittail):
2080 movdqa (%eax, %ecx), %xmm1
2081 psrldq $15, %xmm0
2082 psrldq $15, %xmm3
2083 jmp L(aftertail)
2084
2085 .p2align 4
2086L(aftertail):
2087 TOLOWER (%xmm1, %xmm3)
2088 pcmpeqb %xmm3, %xmm1
2089 psubb %xmm0, %xmm1
2090 pmovmskb %xmm1, %esi
2091 not %esi
2092L(exit):
2093 mov FLAGS, %edi
2094 and $0x1f, %edi
2095 lea -16(%edi, %ecx), %edi
2096L(less32bytes):
2097 add %edi, %edx
2098 add %ecx, %eax
2099 testl $0x20, FLAGS
2100 jz L(ret2)
2101 xchg %eax, %edx
2102
2103 .p2align 4
2104L(ret2):
2105 mov %esi, %ecx
2106#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2107 addl $4, %esp
2108 cfi_adjust_cfa_offset (-4)
2109#endif
2110 POP (%esi)
2111 POP (%edi)
2112#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2113 POP (FLAGS)
2114#endif
2115L(less16bytes):
2116 test %cl, %cl
2117 jz L(2next_8_bytes)
2118
2119 test $0x01, %cl
2120 jnz L(Byte0)
2121
2122 test $0x02, %cl
2123 jnz L(Byte1)
2124
2125 test $0x04, %cl
2126 jnz L(Byte2)
2127
2128 test $0x08, %cl
2129 jnz L(Byte3)
2130
2131 test $0x10, %cl
2132 jnz L(Byte4)
2133
2134 test $0x20, %cl
2135 jnz L(Byte5)
2136
2137 test $0x40, %cl
2138 jnz L(Byte6)
2139#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2140 cmp $7, REM
2141 jbe L(eq)
2142#endif
2143
2144 movzbl 7(%eax), %ecx
2145 movzbl 7(%edx), %eax
2146#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2147# ifdef PIC
2148 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2149 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2150# else
2151 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2152 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2153# endif
2154#endif
2155
2156 sub %ecx, %eax
2157 RETURN
2158
2159L(Byte0):
2160#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2161 cmp $0, REM
2162 jbe L(eq)
2163#endif
2164 movzbl (%eax), %ecx
2165 movzbl (%edx), %eax
2166
2167#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2168# ifdef PIC
2169 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2170 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2171# else
2172 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2173 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2174# endif
2175#endif
2176
2177 sub %ecx, %eax
2178 RETURN
2179
2180L(Byte1):
2181#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2182 cmp $1, REM
2183 jbe L(eq)
2184#endif
2185 movzbl 1(%eax), %ecx
2186 movzbl 1(%edx), %eax
2187
2188#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2189# ifdef PIC
2190 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2191 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2192# else
2193 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2194 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2195# endif
2196#endif
2197
2198 sub %ecx, %eax
2199 RETURN
2200
2201L(Byte2):
2202#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2203 cmp $2, REM
2204 jbe L(eq)
2205#endif
2206 movzbl 2(%eax), %ecx
2207 movzbl 2(%edx), %eax
2208
2209#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2210# ifdef PIC
2211 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2212 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2213# else
2214 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2215 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2216# endif
2217#endif
2218
2219 sub %ecx, %eax
2220 RETURN
2221
2222L(Byte3):
2223#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2224 cmp $3, REM
2225 jbe L(eq)
2226#endif
2227 movzbl 3(%eax), %ecx
2228 movzbl 3(%edx), %eax
2229
2230#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2231# ifdef PIC
2232 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2233 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2234# else
2235 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2236 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2237# endif
2238#endif
2239
2240 sub %ecx, %eax
2241 RETURN
2242
2243L(Byte4):
2244#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2245 cmp $4, REM
2246 jbe L(eq)
2247#endif
2248 movzbl 4(%eax), %ecx
2249 movzbl 4(%edx), %eax
2250
2251#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2252# ifdef PIC
2253 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2254 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2255# else
2256 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2257 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2258# endif
2259#endif
2260
2261 sub %ecx, %eax
2262 RETURN
2263
2264L(Byte5):
2265#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2266 cmp $5, REM
2267 jbe L(eq)
2268#endif
2269 movzbl 5(%eax), %ecx
2270 movzbl 5(%edx), %eax
2271
2272#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2273# ifdef PIC
2274 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2275 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2276# else
2277 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2278 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2279# endif
2280#endif
2281
2282 sub %ecx, %eax
2283 RETURN
2284
2285L(Byte6):
2286#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2287 cmp $6, REM
2288 jbe L(eq)
2289#endif
2290 movzbl 6(%eax), %ecx
2291 movzbl 6(%edx), %eax
2292
2293#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2294# ifdef PIC
2295 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2296 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2297# else
2298 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2299 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2300# endif
2301#endif
2302
2303 sub %ecx, %eax
2304 RETURN
2305
2306L(2next_8_bytes):
2307 add $8, %eax
2308 add $8, %edx
2309#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2310 cmp $8, REM
2311 lea -8(REM), REM
2312 jbe L(eq)
2313#endif
2314
2315 test $0x01, %ch
2316 jnz L(Byte0)
2317
2318 test $0x02, %ch
2319 jnz L(Byte1)
2320
2321 test $0x04, %ch
2322 jnz L(Byte2)
2323
2324 test $0x08, %ch
2325 jnz L(Byte3)
2326
2327 test $0x10, %ch
2328 jnz L(Byte4)
2329
2330 test $0x20, %ch
2331 jnz L(Byte5)
2332
2333 test $0x40, %ch
2334 jnz L(Byte6)
2335
2336#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2337 cmp $7, REM
2338 jbe L(eq)
2339#endif
2340 movzbl 7(%eax), %ecx
2341 movzbl 7(%edx), %eax
2342
2343#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2344# ifdef PIC
2345 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2346 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2347# else
2348 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2349 movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2350# endif
2351#endif
2352
2353 sub %ecx, %eax
2354 RETURN
2355
2356#ifdef USE_AS_STRNCMP
2357L(neq_sncmp):
2358#endif
2359L(neq):
2360 mov $1, %eax
2361 ja L(neq_bigger)
2362 neg %eax
2363L(neq_bigger):
2364#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2365 addl $4, %esp
2366 cfi_adjust_cfa_offset (-4)
2367#endif
2368#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2369 POP (REM)
2370#endif
2371#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2372# ifdef PIC
2373 POP (%ebx)
2374# endif
2375#endif
2376 ret
2377
2378#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2379 .p2align 4
2380 cfi_restore_state
2381L(more8byteseq):
2382
2383# ifdef USE_AS_STRNCASECMP_L
2384 addl $4, %esp
2385 cfi_adjust_cfa_offset (-4)
2386# endif
2387 POP (%esi)
2388 POP (%edi)
2389# ifdef USE_AS_STRNCMP
2390 POP (FLAGS)
2391# endif
2392#endif
2393
2394#ifdef USE_AS_STRNCMP
2395L(eq_sncmp):
2396#endif
2397L(eq):
2398
2399#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2400 POP (REM)
2401#endif
2402#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2403# ifdef PIC
2404 POP (%ebx)
2405# endif
2406#endif
2407 xorl %eax, %eax
2408 ret
2409
2410#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2411 .p2align 4
2412# if defined USE_AS_STRNCASECMP_L && defined PIC
2413 CFI_PUSH (%ebx)
2414# endif
2415 CFI_PUSH (REM)
2416L(less16bytes_sncmp):
2417# ifdef USE_AS_STRNCASECMP_L
2418 PUSH (%esi)
2419# endif
2420 test REM, REM
2421 jz L(eq_sncmp)
2422
2423 movzbl (%eax), %ecx
2424# ifdef USE_AS_STRNCASECMP_L
2425 movzbl (%edx), %esi
2426# ifdef PIC
2427 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2428 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2429# else
2430 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2431 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2432# endif
2433 cmpl %ecx, %esi
2434# else
2435 cmpb %cl, (%edx)
2436# endif
2437 jne L(neq_sncmp)
2438 test %cl, %cl
2439 je L(eq_sncmp)
2440
2441 cmp $1, REM
2442 je L(eq_sncmp)
2443
2444 movzbl 1(%eax), %ecx
2445# ifdef USE_AS_STRNCASECMP_L
2446 movzbl 1(%edx), %esi
2447# ifdef PIC
2448 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2449 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2450# else
2451 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2452 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2453# endif
2454 cmpl %ecx, %esi
2455# else
2456 cmpb %cl, 1(%edx)
2457# endif
2458 jne L(neq_sncmp)
2459 test %cl, %cl
2460 je L(eq_sncmp)
2461
2462 cmp $2, REM
2463 je L(eq_sncmp)
2464
2465 movzbl 2(%eax), %ecx
2466# ifdef USE_AS_STRNCASECMP_L
2467 movzbl 2(%edx), %esi
2468# ifdef PIC
2469 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2470 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2471# else
2472 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2473 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2474# endif
2475 cmpl %ecx, %esi
2476# else
2477 cmpb %cl, 2(%edx)
2478# endif
2479 jne L(neq_sncmp)
2480 test %cl, %cl
2481 je L(eq_sncmp)
2482
2483 cmp $3, REM
2484 je L(eq_sncmp)
2485
2486 movzbl 3(%eax), %ecx
2487# ifdef USE_AS_STRNCASECMP_L
2488 movzbl 3(%edx), %esi
2489# ifdef PIC
2490 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2491 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2492# else
2493 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2494 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2495# endif
2496 cmpl %ecx, %esi
2497# else
2498 cmpb %cl, 3(%edx)
2499# endif
2500 jne L(neq_sncmp)
2501 test %cl, %cl
2502 je L(eq_sncmp)
2503
2504 cmp $4, REM
2505 je L(eq_sncmp)
2506
2507 movzbl 4(%eax), %ecx
2508# ifdef USE_AS_STRNCASECMP_L
2509 movzbl 4(%edx), %esi
2510# ifdef PIC
2511 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2512 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2513# else
2514 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2515 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2516# endif
2517 cmpl %ecx, %esi
2518# else
2519 cmpb %cl, 4(%edx)
2520# endif
2521 jne L(neq_sncmp)
2522 test %cl, %cl
2523 je L(eq_sncmp)
2524
2525 cmp $5, REM
2526 je L(eq_sncmp)
2527
2528 movzbl 5(%eax), %ecx
2529# ifdef USE_AS_STRNCASECMP_L
2530 movzbl 5(%edx), %esi
2531# ifdef PIC
2532 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2533 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2534# else
2535 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2536 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2537# endif
2538 cmpl %ecx, %esi
2539# else
2540 cmpb %cl, 5(%edx)
2541# endif
2542 jne L(neq_sncmp)
2543 test %cl, %cl
2544 je L(eq_sncmp)
2545
2546 cmp $6, REM
2547 je L(eq_sncmp)
2548
2549 movzbl 6(%eax), %ecx
2550# ifdef USE_AS_STRNCASECMP_L
2551 movzbl 6(%edx), %esi
2552# ifdef PIC
2553 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2554 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2555# else
2556 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2557 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2558# endif
2559 cmpl %ecx, %esi
2560# else
2561 cmpb %cl, 6(%edx)
2562# endif
2563 jne L(neq_sncmp)
2564 test %cl, %cl
2565 je L(eq_sncmp)
2566
2567 cmp $7, REM
2568 je L(eq_sncmp)
2569
2570 movzbl 7(%eax), %ecx
2571# ifdef USE_AS_STRNCASECMP_L
2572 movzbl 7(%edx), %esi
2573# ifdef PIC
2574 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2575 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2576# else
2577 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2578 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2579# endif
2580 cmpl %ecx, %esi
2581# else
2582 cmpb %cl, 7(%edx)
2583# endif
2584 jne L(neq_sncmp)
2585 test %cl, %cl
2586 je L(eq_sncmp)
2587
2588
2589 cmp $8, REM
2590 je L(eq_sncmp)
2591
2592 movzbl 8(%eax), %ecx
2593# ifdef USE_AS_STRNCASECMP_L
2594 movzbl 8(%edx), %esi
2595# ifdef PIC
2596 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2597 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2598# else
2599 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2600 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2601# endif
2602 cmpl %ecx, %esi
2603# else
2604 cmpb %cl, 8(%edx)
2605# endif
2606 jne L(neq_sncmp)
2607 test %cl, %cl
2608 je L(eq_sncmp)
2609
2610 cmp $9, REM
2611 je L(eq_sncmp)
2612
2613 movzbl 9(%eax), %ecx
2614# ifdef USE_AS_STRNCASECMP_L
2615 movzbl 9(%edx), %esi
2616# ifdef PIC
2617 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2618 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2619# else
2620 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2621 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2622# endif
2623 cmpl %ecx, %esi
2624# else
2625 cmpb %cl, 9(%edx)
2626# endif
2627 jne L(neq_sncmp)
2628 test %cl, %cl
2629 je L(eq_sncmp)
2630
2631 cmp $10, REM
2632 je L(eq_sncmp)
2633
2634 movzbl 10(%eax), %ecx
2635# ifdef USE_AS_STRNCASECMP_L
2636 movzbl 10(%edx), %esi
2637# ifdef PIC
2638 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2639 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2640# else
2641 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2642 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2643# endif
2644 cmpl %ecx, %esi
2645# else
2646 cmpb %cl, 10(%edx)
2647# endif
2648 jne L(neq_sncmp)
2649 test %cl, %cl
2650 je L(eq_sncmp)
2651
2652 cmp $11, REM
2653 je L(eq_sncmp)
2654
2655 movzbl 11(%eax), %ecx
2656# ifdef USE_AS_STRNCASECMP_L
2657 movzbl 11(%edx), %esi
2658# ifdef PIC
2659 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2660 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2661# else
2662 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2663 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2664# endif
2665 cmpl %ecx, %esi
2666# else
2667 cmpb %cl, 11(%edx)
2668# endif
2669 jne L(neq_sncmp)
2670 test %cl, %cl
2671 je L(eq_sncmp)
2672
2673
2674 cmp $12, REM
2675 je L(eq_sncmp)
2676
2677 movzbl 12(%eax), %ecx
2678# ifdef USE_AS_STRNCASECMP_L
2679 movzbl 12(%edx), %esi
2680# ifdef PIC
2681 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2682 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2683# else
2684 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2685 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2686# endif
2687 cmpl %ecx, %esi
2688# else
2689 cmpb %cl, 12(%edx)
2690# endif
2691 jne L(neq_sncmp)
2692 test %cl, %cl
2693 je L(eq_sncmp)
2694
2695 cmp $13, REM
2696 je L(eq_sncmp)
2697
2698 movzbl 13(%eax), %ecx
2699# ifdef USE_AS_STRNCASECMP_L
2700 movzbl 13(%edx), %esi
2701# ifdef PIC
2702 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2703 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2704# else
2705 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2706 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2707# endif
2708 cmpl %ecx, %esi
2709# else
2710 cmpb %cl, 13(%edx)
2711# endif
2712 jne L(neq_sncmp)
2713 test %cl, %cl
2714 je L(eq_sncmp)
2715
2716 cmp $14, REM
2717 je L(eq_sncmp)
2718
2719 movzbl 14(%eax), %ecx
2720# ifdef USE_AS_STRNCASECMP_L
2721 movzbl 14(%edx), %esi
2722# ifdef PIC
2723 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2724 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2725# else
2726 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2727 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2728# endif
2729 cmpl %ecx, %esi
2730# else
2731 cmpb %cl, 14(%edx)
2732# endif
2733 jne L(neq_sncmp)
2734 test %cl, %cl
2735 je L(eq_sncmp)
2736
2737 cmp $15, REM
2738 je L(eq_sncmp)
2739
2740 movzbl 15(%eax), %ecx
2741# ifdef USE_AS_STRNCASECMP_L
2742 movzbl 15(%edx), %esi
2743# ifdef PIC
2744 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2745 movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2746# else
2747 movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2748 movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2749# endif
2750 cmpl %ecx, %esi
2751# else
2752 cmpb %cl, 15(%edx)
2753# endif
2754 jne L(neq_sncmp)
2755
2756# ifdef USE_AS_STRNCASECMP_L
2757L(eq_sncmp):
2758 POP (%esi)
2759# endif
2760 POP (REM)
2761# if defined USE_AS_STRNCASECMP_L && defined PIC
2762 POP (%ebx)
2763# endif
2764 xor %eax, %eax
2765 ret
2766
2767# ifdef USE_AS_STRNCASECMP_L
2768 .p2align 4
2769# ifdef PIC
2770 CFI_PUSH (%ebx)
2771# endif
2772 CFI_PUSH (REM)
2773 CFI_PUSH (%esi)
2774L(neq_sncmp):
2775 mov $1, %eax
2776 mov $-1, %edx
2777 cmovna %edx, %eax
2778 POP (%esi)
2779 POP (REM)
2780# ifdef PIC
2781 POP (%ebx)
2782# endif
2783 ret
2784# endif
2785#endif
2786
2787END (STRCMP)
2788
2789#endif
2790

source code of glibc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S