1/* wcscmp with SSE2
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22
23# define CFI_PUSH(REG) \
24 cfi_adjust_cfa_offset (4); \
25 cfi_rel_offset (REG, 0)
26
27# define CFI_POP(REG) \
28 cfi_adjust_cfa_offset (-4); \
29 cfi_restore (REG)
30
31# define PUSH(REG) pushl REG; CFI_PUSH (REG)
32# define POP(REG) popl REG; CFI_POP (REG)
33
34# define ENTRANCE PUSH(%esi); PUSH(%edi)
35# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
36# define PARMS 4
37# define STR1 PARMS
38# define STR2 STR1+4
39
40/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
41
42 .text
43ENTRY (__wcscmp_sse2)
44/*
45 * This implementation uses SSE to compare up to 16 bytes at a time.
46*/
47 mov STR1(%esp), %edx
48 mov STR2(%esp), %eax
49
50 mov (%eax), %ecx
51 cmp %ecx, (%edx)
52 jne L(neq)
53 test %ecx, %ecx
54 jz L(eq)
55
56 mov 4(%eax), %ecx
57 cmp %ecx, 4(%edx)
58 jne L(neq)
59 test %ecx, %ecx
60 jz L(eq)
61
62 mov 8(%eax), %ecx
63 cmp %ecx, 8(%edx)
64 jne L(neq)
65 test %ecx, %ecx
66 jz L(eq)
67
68 mov 12(%eax), %ecx
69 cmp %ecx, 12(%edx)
70 jne L(neq)
71 test %ecx, %ecx
72 jz L(eq)
73
74 ENTRANCE
75 add $16, %eax
76 add $16, %edx
77
78 mov %eax, %esi
79 mov %edx, %edi
80 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
81 mov %al, %ch
82 mov %dl, %cl
83 and $63, %eax /* esi alignment in cache line */
84 and $63, %edx /* edi alignment in cache line */
85 and $15, %cl
86 jz L(continue_00)
87 cmp $16, %edx
88 jb L(continue_0)
89 cmp $32, %edx
90 jb L(continue_16)
91 cmp $48, %edx
92 jb L(continue_32)
93
94L(continue_48):
95 and $15, %ch
96 jz L(continue_48_00)
97 cmp $16, %eax
98 jb L(continue_0_48)
99 cmp $32, %eax
100 jb L(continue_16_48)
101 cmp $48, %eax
102 jb L(continue_32_48)
103
104 .p2align 4
105L(continue_48_48):
106 mov (%esi), %ecx
107 cmp %ecx, (%edi)
108 jne L(nequal)
109 test %ecx, %ecx
110 jz L(equal)
111
112 mov 4(%esi), %ecx
113 cmp %ecx, 4(%edi)
114 jne L(nequal)
115 test %ecx, %ecx
116 jz L(equal)
117
118 mov 8(%esi), %ecx
119 cmp %ecx, 8(%edi)
120 jne L(nequal)
121 test %ecx, %ecx
122 jz L(equal)
123
124 mov 12(%esi), %ecx
125 cmp %ecx, 12(%edi)
126 jne L(nequal)
127 test %ecx, %ecx
128 jz L(equal)
129
130 movdqu 16(%edi), %xmm1
131 movdqu 16(%esi), %xmm2
132 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
133 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
134 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
135 pmovmskb %xmm1, %edx
136 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
137 jnz L(less4_double_words_16)
138
139 movdqu 32(%edi), %xmm1
140 movdqu 32(%esi), %xmm2
141 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
142 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
143 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
144 pmovmskb %xmm1, %edx
145 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
146 jnz L(less4_double_words_32)
147
148 movdqu 48(%edi), %xmm1
149 movdqu 48(%esi), %xmm2
150 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
151 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
152 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
153 pmovmskb %xmm1, %edx
154 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
155 jnz L(less4_double_words_48)
156
157 add $64, %esi
158 add $64, %edi
159 jmp L(continue_48_48)
160
161L(continue_0):
162 and $15, %ch
163 jz L(continue_0_00)
164 cmp $16, %eax
165 jb L(continue_0_0)
166 cmp $32, %eax
167 jb L(continue_0_16)
168 cmp $48, %eax
169 jb L(continue_0_32)
170
171 .p2align 4
172L(continue_0_48):
173 mov (%esi), %ecx
174 cmp %ecx, (%edi)
175 jne L(nequal)
176 test %ecx, %ecx
177 jz L(equal)
178
179 mov 4(%esi), %ecx
180 cmp %ecx, 4(%edi)
181 jne L(nequal)
182 test %ecx, %ecx
183 jz L(equal)
184
185 mov 8(%esi), %ecx
186 cmp %ecx, 8(%edi)
187 jne L(nequal)
188 test %ecx, %ecx
189 jz L(equal)
190
191 mov 12(%esi), %ecx
192 cmp %ecx, 12(%edi)
193 jne L(nequal)
194 test %ecx, %ecx
195 jz L(equal)
196
197 movdqu 16(%edi), %xmm1
198 movdqu 16(%esi), %xmm2
199 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
200 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
201 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
202 pmovmskb %xmm1, %edx
203 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
204 jnz L(less4_double_words_16)
205
206 movdqu 32(%edi), %xmm1
207 movdqu 32(%esi), %xmm2
208 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
209 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
210 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
211 pmovmskb %xmm1, %edx
212 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
213 jnz L(less4_double_words_32)
214
215 mov 48(%esi), %ecx
216 cmp %ecx, 48(%edi)
217 jne L(nequal)
218 test %ecx, %ecx
219 jz L(equal)
220
221 mov 52(%esi), %ecx
222 cmp %ecx, 52(%edi)
223 jne L(nequal)
224 test %ecx, %ecx
225 jz L(equal)
226
227 mov 56(%esi), %ecx
228 cmp %ecx, 56(%edi)
229 jne L(nequal)
230 test %ecx, %ecx
231 jz L(equal)
232
233 mov 60(%esi), %ecx
234 cmp %ecx, 60(%edi)
235 jne L(nequal)
236 test %ecx, %ecx
237 jz L(equal)
238
239 add $64, %esi
240 add $64, %edi
241 jmp L(continue_0_48)
242
243 .p2align 4
244L(continue_00):
245 and $15, %ch
246 jz L(continue_00_00)
247 cmp $16, %eax
248 jb L(continue_00_0)
249 cmp $32, %eax
250 jb L(continue_00_16)
251 cmp $48, %eax
252 jb L(continue_00_32)
253
254 .p2align 4
255L(continue_00_48):
256 pcmpeqd (%edi), %xmm0
257 mov (%edi), %eax
258 pmovmskb %xmm0, %ecx
259 test %ecx, %ecx
260 jnz L(less4_double_words1)
261
262 cmp (%esi), %eax
263 jne L(nequal)
264
265 mov 4(%edi), %eax
266 cmp 4(%esi), %eax
267 jne L(nequal)
268
269 mov 8(%edi), %eax
270 cmp 8(%esi), %eax
271 jne L(nequal)
272
273 mov 12(%edi), %eax
274 cmp 12(%esi), %eax
275 jne L(nequal)
276
277 movdqu 16(%esi), %xmm2
278 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
279 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
280 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
281 pmovmskb %xmm2, %edx
282 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
283 jnz L(less4_double_words_16)
284
285 movdqu 32(%esi), %xmm2
286 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
287 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
288 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
289 pmovmskb %xmm2, %edx
290 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
291 jnz L(less4_double_words_32)
292
293 movdqu 48(%esi), %xmm2
294 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
295 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
296 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
297 pmovmskb %xmm2, %edx
298 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
299 jnz L(less4_double_words_48)
300
301 add $64, %esi
302 add $64, %edi
303 jmp L(continue_00_48)
304
305 .p2align 4
306L(continue_32):
307 and $15, %ch
308 jz L(continue_32_00)
309 cmp $16, %eax
310 jb L(continue_0_32)
311 cmp $32, %eax
312 jb L(continue_16_32)
313 cmp $48, %eax
314 jb L(continue_32_32)
315
316 .p2align 4
317L(continue_32_48):
318 mov (%esi), %ecx
319 cmp %ecx, (%edi)
320 jne L(nequal)
321 test %ecx, %ecx
322 jz L(equal)
323
324 mov 4(%esi), %ecx
325 cmp %ecx, 4(%edi)
326 jne L(nequal)
327 test %ecx, %ecx
328 jz L(equal)
329
330 mov 8(%esi), %ecx
331 cmp %ecx, 8(%edi)
332 jne L(nequal)
333 test %ecx, %ecx
334 jz L(equal)
335
336 mov 12(%esi), %ecx
337 cmp %ecx, 12(%edi)
338 jne L(nequal)
339 test %ecx, %ecx
340 jz L(equal)
341
342 mov 16(%esi), %ecx
343 cmp %ecx, 16(%edi)
344 jne L(nequal)
345 test %ecx, %ecx
346 jz L(equal)
347
348 mov 20(%esi), %ecx
349 cmp %ecx, 20(%edi)
350 jne L(nequal)
351 test %ecx, %ecx
352 jz L(equal)
353
354 mov 24(%esi), %ecx
355 cmp %ecx, 24(%edi)
356 jne L(nequal)
357 test %ecx, %ecx
358 jz L(equal)
359
360 mov 28(%esi), %ecx
361 cmp %ecx, 28(%edi)
362 jne L(nequal)
363 test %ecx, %ecx
364 jz L(equal)
365
366 movdqu 32(%edi), %xmm1
367 movdqu 32(%esi), %xmm2
368 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
369 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
370 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
371 pmovmskb %xmm1, %edx
372 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
373 jnz L(less4_double_words_32)
374
375 movdqu 48(%edi), %xmm1
376 movdqu 48(%esi), %xmm2
377 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
378 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
379 psubb %xmm0, %xmm1 /* packed sub of comparison results */
380 pmovmskb %xmm1, %edx
381 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
382 jnz L(less4_double_words_48)
383
384 add $64, %esi
385 add $64, %edi
386 jmp L(continue_32_48)
387
388 .p2align 4
389L(continue_16):
390 and $15, %ch
391 jz L(continue_16_00)
392 cmp $16, %eax
393 jb L(continue_0_16)
394 cmp $32, %eax
395 jb L(continue_16_16)
396 cmp $48, %eax
397 jb L(continue_16_32)
398
399 .p2align 4
400L(continue_16_48):
401 mov (%esi), %ecx
402 cmp %ecx, (%edi)
403 jne L(nequal)
404 test %ecx, %ecx
405 jz L(equal)
406
407 mov 4(%esi), %ecx
408 cmp %ecx, 4(%edi)
409 jne L(nequal)
410 test %ecx, %ecx
411 jz L(equal)
412
413 mov 8(%esi), %ecx
414 cmp %ecx, 8(%edi)
415 jne L(nequal)
416 test %ecx, %ecx
417 jz L(equal)
418
419 mov 12(%esi), %ecx
420 cmp %ecx, 12(%edi)
421 jne L(nequal)
422 test %ecx, %ecx
423 jz L(equal)
424
425 movdqu 16(%edi), %xmm1
426 movdqu 16(%esi), %xmm2
427 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
428 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
429 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
430 pmovmskb %xmm1, %edx
431 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
432 jnz L(less4_double_words_16)
433
434 mov 32(%esi), %ecx
435 cmp %ecx, 32(%edi)
436 jne L(nequal)
437 test %ecx, %ecx
438 jz L(equal)
439
440 mov 36(%esi), %ecx
441 cmp %ecx, 36(%edi)
442 jne L(nequal)
443 test %ecx, %ecx
444 jz L(equal)
445
446 mov 40(%esi), %ecx
447 cmp %ecx, 40(%edi)
448 jne L(nequal)
449 test %ecx, %ecx
450 jz L(equal)
451
452 mov 44(%esi), %ecx
453 cmp %ecx, 44(%edi)
454 jne L(nequal)
455 test %ecx, %ecx
456 jz L(equal)
457
458 movdqu 48(%edi), %xmm1
459 movdqu 48(%esi), %xmm2
460 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
461 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
462 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
463 pmovmskb %xmm1, %edx
464 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
465 jnz L(less4_double_words_48)
466
467 add $64, %esi
468 add $64, %edi
469 jmp L(continue_16_48)
470
471 .p2align 4
472L(continue_00_00):
473 movdqa (%edi), %xmm1
474 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
475 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
476 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
477 pmovmskb %xmm1, %edx
478 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
479 jnz L(less4_double_words)
480
481 movdqa 16(%edi), %xmm3
482 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
483 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
484 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
485 pmovmskb %xmm3, %edx
486 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
487 jnz L(less4_double_words_16)
488
489 movdqa 32(%edi), %xmm5
490 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
491 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
492 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
493 pmovmskb %xmm5, %edx
494 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
495 jnz L(less4_double_words_32)
496
497 movdqa 48(%edi), %xmm1
498 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
499 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
500 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
501 pmovmskb %xmm1, %edx
502 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
503 jnz L(less4_double_words_48)
504
505 add $64, %esi
506 add $64, %edi
507 jmp L(continue_00_00)
508
509 .p2align 4
510L(continue_00_32):
511 movdqu (%esi), %xmm2
512 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
513 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
514 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
515 pmovmskb %xmm2, %edx
516 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
517 jnz L(less4_double_words)
518
519 add $16, %esi
520 add $16, %edi
521 jmp L(continue_00_48)
522
523 .p2align 4
524L(continue_00_16):
525 movdqu (%esi), %xmm2
526 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
527 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
528 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
529 pmovmskb %xmm2, %edx
530 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
531 jnz L(less4_double_words)
532
533 movdqu 16(%esi), %xmm2
534 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
535 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
536 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
537 pmovmskb %xmm2, %edx
538 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
539 jnz L(less4_double_words_16)
540
541 add $32, %esi
542 add $32, %edi
543 jmp L(continue_00_48)
544
545 .p2align 4
546L(continue_00_0):
547 movdqu (%esi), %xmm2
548 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
549 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
550 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
551 pmovmskb %xmm2, %edx
552 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
553 jnz L(less4_double_words)
554
555 movdqu 16(%esi), %xmm2
556 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
557 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
558 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
559 pmovmskb %xmm2, %edx
560 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
561 jnz L(less4_double_words_16)
562
563 movdqu 32(%esi), %xmm2
564 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
565 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
566 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
567 pmovmskb %xmm2, %edx
568 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
569 jnz L(less4_double_words_32)
570
571 add $48, %esi
572 add $48, %edi
573 jmp L(continue_00_48)
574
575 .p2align 4
576L(continue_48_00):
577 pcmpeqd (%esi), %xmm0
578 mov (%edi), %eax
579 pmovmskb %xmm0, %ecx
580 test %ecx, %ecx
581 jnz L(less4_double_words1)
582
583 cmp (%esi), %eax
584 jne L(nequal)
585
586 mov 4(%edi), %eax
587 cmp 4(%esi), %eax
588 jne L(nequal)
589
590 mov 8(%edi), %eax
591 cmp 8(%esi), %eax
592 jne L(nequal)
593
594 mov 12(%edi), %eax
595 cmp 12(%esi), %eax
596 jne L(nequal)
597
598 movdqu 16(%edi), %xmm1
599 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
600 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
601 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
602 pmovmskb %xmm1, %edx
603 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
604 jnz L(less4_double_words_16)
605
606 movdqu 32(%edi), %xmm1
607 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
608 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
609 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
610 pmovmskb %xmm1, %edx
611 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
612 jnz L(less4_double_words_32)
613
614 movdqu 48(%edi), %xmm1
615 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
616 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
617 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
618 pmovmskb %xmm1, %edx
619 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
620 jnz L(less4_double_words_48)
621
622 add $64, %esi
623 add $64, %edi
624 jmp L(continue_48_00)
625
626 .p2align 4
627L(continue_32_00):
628 movdqu (%edi), %xmm1
629 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
630 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
631 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
632 pmovmskb %xmm1, %edx
633 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
634 jnz L(less4_double_words)
635
636 add $16, %esi
637 add $16, %edi
638 jmp L(continue_48_00)
639
640 .p2align 4
641L(continue_16_00):
642 movdqu (%edi), %xmm1
643 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
644 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
645 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
646 pmovmskb %xmm1, %edx
647 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
648 jnz L(less4_double_words)
649
650 movdqu 16(%edi), %xmm1
651 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
652 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
653 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
654 pmovmskb %xmm1, %edx
655 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
656 jnz L(less4_double_words_16)
657
658 add $32, %esi
659 add $32, %edi
660 jmp L(continue_48_00)
661
662 .p2align 4
663L(continue_0_00):
664 movdqu (%edi), %xmm1
665 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
666 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
667 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
668 pmovmskb %xmm1, %edx
669 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
670 jnz L(less4_double_words)
671
672 movdqu 16(%edi), %xmm1
673 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
674 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
675 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
676 pmovmskb %xmm1, %edx
677 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
678 jnz L(less4_double_words_16)
679
680 movdqu 32(%edi), %xmm1
681 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
682 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
683 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
684 pmovmskb %xmm1, %edx
685 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
686 jnz L(less4_double_words_32)
687
688 add $48, %esi
689 add $48, %edi
690 jmp L(continue_48_00)
691
692 .p2align 4
693L(continue_32_32):
694 movdqu (%edi), %xmm1
695 movdqu (%esi), %xmm2
696 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
697 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
698 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
699 pmovmskb %xmm1, %edx
700 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
701 jnz L(less4_double_words)
702
703 add $16, %esi
704 add $16, %edi
705 jmp L(continue_48_48)
706
707 .p2align 4
708L(continue_16_16):
709 movdqu (%edi), %xmm1
710 movdqu (%esi), %xmm2
711 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
712 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
713 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
714 pmovmskb %xmm1, %edx
715 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
716 jnz L(less4_double_words)
717
718 movdqu 16(%edi), %xmm3
719 movdqu 16(%esi), %xmm4
720 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
721 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
722 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
723 pmovmskb %xmm3, %edx
724 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
725 jnz L(less4_double_words_16)
726
727 add $32, %esi
728 add $32, %edi
729 jmp L(continue_48_48)
730
731 .p2align 4
732L(continue_0_0):
733 movdqu (%edi), %xmm1
734 movdqu (%esi), %xmm2
735 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
736 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
737 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
738 pmovmskb %xmm1, %edx
739 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
740 jnz L(less4_double_words)
741
742 movdqu 16(%edi), %xmm3
743 movdqu 16(%esi), %xmm4
744 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
745 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
746 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
747 pmovmskb %xmm3, %edx
748 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
749 jnz L(less4_double_words_16)
750
751 movdqu 32(%edi), %xmm1
752 movdqu 32(%esi), %xmm2
753 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
754 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
755 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
756 pmovmskb %xmm1, %edx
757 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
758 jnz L(less4_double_words_32)
759
760 add $48, %esi
761 add $48, %edi
762 jmp L(continue_48_48)
763
764 .p2align 4
765L(continue_0_16):
766 movdqu (%edi), %xmm1
767 movdqu (%esi), %xmm2
768 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
769 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
770 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
771 pmovmskb %xmm1, %edx
772 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
773 jnz L(less4_double_words)
774
775 movdqu 16(%edi), %xmm1
776 movdqu 16(%esi), %xmm2
777 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
778 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
779 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
780 pmovmskb %xmm1, %edx
781 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
782 jnz L(less4_double_words_16)
783
784 add $32, %esi
785 add $32, %edi
786 jmp L(continue_32_48)
787
788 .p2align 4
789L(continue_0_32):
790 movdqu (%edi), %xmm1
791 movdqu (%esi), %xmm2
792 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
793 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
794 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
795 pmovmskb %xmm1, %edx
796 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
797 jnz L(less4_double_words)
798
799 add $16, %esi
800 add $16, %edi
801 jmp L(continue_16_48)
802
803 .p2align 4
804L(continue_16_32):
805 movdqu (%edi), %xmm1
806 movdqu (%esi), %xmm2
807 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
808 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
809 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
810 pmovmskb %xmm1, %edx
811 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
812 jnz L(less4_double_words)
813
814 add $16, %esi
815 add $16, %edi
816 jmp L(continue_32_48)
817
818 .p2align 4
819L(less4_double_words1):
820 cmp (%esi), %eax
821 jne L(nequal)
822 test %eax, %eax
823 jz L(equal)
824
825 mov 4(%esi), %ecx
826 cmp %ecx, 4(%edi)
827 jne L(nequal)
828 test %ecx, %ecx
829 jz L(equal)
830
831 mov 8(%esi), %ecx
832 cmp %ecx, 8(%edi)
833 jne L(nequal)
834 test %ecx, %ecx
835 jz L(equal)
836
837 mov 12(%esi), %ecx
838 cmp %ecx, 12(%edi)
839 jne L(nequal)
840 xor %eax, %eax
841 RETURN
842
843 .p2align 4
844L(less4_double_words):
845 xor %eax, %eax
846 test %dl, %dl
847 jz L(next_two_double_words)
848 and $15, %dl
849 jz L(second_double_word)
850 mov (%esi), %ecx
851 cmp %ecx, (%edi)
852 jne L(nequal)
853 RETURN
854
855 .p2align 4
856L(second_double_word):
857 mov 4(%esi), %ecx
858 cmp %ecx, 4(%edi)
859 jne L(nequal)
860 RETURN
861
862 .p2align 4
863L(next_two_double_words):
864 and $15, %dh
865 jz L(fourth_double_word)
866 mov 8(%esi), %ecx
867 cmp %ecx, 8(%edi)
868 jne L(nequal)
869 RETURN
870
871 .p2align 4
872L(fourth_double_word):
873 mov 12(%esi), %ecx
874 cmp %ecx, 12(%edi)
875 jne L(nequal)
876 RETURN
877
878 .p2align 4
879L(less4_double_words_16):
880 xor %eax, %eax
881 test %dl, %dl
882 jz L(next_two_double_words_16)
883 and $15, %dl
884 jz L(second_double_word_16)
885 mov 16(%esi), %ecx
886 cmp %ecx, 16(%edi)
887 jne L(nequal)
888 RETURN
889
890 .p2align 4
891L(second_double_word_16):
892 mov 20(%esi), %ecx
893 cmp %ecx, 20(%edi)
894 jne L(nequal)
895 RETURN
896
897 .p2align 4
898L(next_two_double_words_16):
899 and $15, %dh
900 jz L(fourth_double_word_16)
901 mov 24(%esi), %ecx
902 cmp %ecx, 24(%edi)
903 jne L(nequal)
904 RETURN
905
906 .p2align 4
907L(fourth_double_word_16):
908 mov 28(%esi), %ecx
909 cmp %ecx, 28(%edi)
910 jne L(nequal)
911 RETURN
912
913 .p2align 4
914L(less4_double_words_32):
915 xor %eax, %eax
916 test %dl, %dl
917 jz L(next_two_double_words_32)
918 and $15, %dl
919 jz L(second_double_word_32)
920 mov 32(%esi), %ecx
921 cmp %ecx, 32(%edi)
922 jne L(nequal)
923 RETURN
924
925 .p2align 4
926L(second_double_word_32):
927 mov 36(%esi), %ecx
928 cmp %ecx, 36(%edi)
929 jne L(nequal)
930 RETURN
931
932 .p2align 4
933L(next_two_double_words_32):
934 and $15, %dh
935 jz L(fourth_double_word_32)
936 mov 40(%esi), %ecx
937 cmp %ecx, 40(%edi)
938 jne L(nequal)
939 RETURN
940
941 .p2align 4
942L(fourth_double_word_32):
943 mov 44(%esi), %ecx
944 cmp %ecx, 44(%edi)
945 jne L(nequal)
946 RETURN
947
948 .p2align 4
949L(less4_double_words_48):
950 xor %eax, %eax
951 test %dl, %dl
952 jz L(next_two_double_words_48)
953 and $15, %dl
954 jz L(second_double_word_48)
955 mov 48(%esi), %ecx
956 cmp %ecx, 48(%edi)
957 jne L(nequal)
958 RETURN
959
960 .p2align 4
961L(second_double_word_48):
962 mov 52(%esi), %ecx
963 cmp %ecx, 52(%edi)
964 jne L(nequal)
965 RETURN
966
967 .p2align 4
968L(next_two_double_words_48):
969 and $15, %dh
970 jz L(fourth_double_word_48)
971 mov 56(%esi), %ecx
972 cmp %ecx, 56(%edi)
973 jne L(nequal)
974 RETURN
975
976 .p2align 4
977L(fourth_double_word_48):
978 mov 60(%esi), %ecx
979 cmp %ecx, 60(%edi)
980 jne L(nequal)
981 RETURN
982
983 .p2align 4
984L(nequal):
985 mov $1, %eax
986 jg L(return)
987 neg %eax
988 RETURN
989
990 .p2align 4
991L(return):
992 RETURN
993
994 .p2align 4
995L(equal):
996 xorl %eax, %eax
997 RETURN
998
999 CFI_POP (%edi)
1000 CFI_POP (%esi)
1001
1002 .p2align 4
1003L(neq):
1004 mov $1, %eax
1005 jg L(neq_bigger)
1006 neg %eax
1007
1008L(neq_bigger):
1009 ret
1010
1011 .p2align 4
1012L(eq):
1013 xorl %eax, %eax
1014 ret
1015
1016END (__wcscmp_sse2)
1017#endif
1018

source code of glibc/sysdeps/i386/i686/multiarch/wcscmp-sse2.S