1/* Optimized wcscmp for x86-64 with SSE2.
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20
21/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */
22
23 .text
24ENTRY (__wcscmp)
25/*
26 * This implementation uses SSE to compare up to 16 bytes at a time.
27*/
28 mov %esi, %eax
29 mov %edi, %edx
30 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
31 mov %al, %ch
32 mov %dl, %cl
33 and $63, %eax /* rsi alignment in cache line */
34 and $63, %edx /* rdi alignment in cache line */
35 and $15, %cl
36 jz L(continue_00)
37 cmp $16, %edx
38 jb L(continue_0)
39 cmp $32, %edx
40 jb L(continue_16)
41 cmp $48, %edx
42 jb L(continue_32)
43
44L(continue_48):
45 and $15, %ch
46 jz L(continue_48_00)
47 cmp $16, %eax
48 jb L(continue_0_48)
49 cmp $32, %eax
50 jb L(continue_16_48)
51 cmp $48, %eax
52 jb L(continue_32_48)
53
54 .p2align 4
55L(continue_48_48):
56 mov (%rsi), %ecx
57 cmp %ecx, (%rdi)
58 jne L(nequal)
59 test %ecx, %ecx
60 jz L(equal)
61
62 mov 4(%rsi), %ecx
63 cmp %ecx, 4(%rdi)
64 jne L(nequal)
65 test %ecx, %ecx
66 jz L(equal)
67
68 mov 8(%rsi), %ecx
69 cmp %ecx, 8(%rdi)
70 jne L(nequal)
71 test %ecx, %ecx
72 jz L(equal)
73
74 mov 12(%rsi), %ecx
75 cmp %ecx, 12(%rdi)
76 jne L(nequal)
77 test %ecx, %ecx
78 jz L(equal)
79
80 movdqu 16(%rdi), %xmm1
81 movdqu 16(%rsi), %xmm2
82 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
83 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
84 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
85 pmovmskb %xmm1, %edx
86 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
87 jnz L(less4_double_words_16)
88
89 movdqu 32(%rdi), %xmm1
90 movdqu 32(%rsi), %xmm2
91 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
92 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
93 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
94 pmovmskb %xmm1, %edx
95 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
96 jnz L(less4_double_words_32)
97
98 movdqu 48(%rdi), %xmm1
99 movdqu 48(%rsi), %xmm2
100 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
101 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
102 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
103 pmovmskb %xmm1, %edx
104 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
105 jnz L(less4_double_words_48)
106
107 add $64, %rsi
108 add $64, %rdi
109 jmp L(continue_48_48)
110
111L(continue_0):
112 and $15, %ch
113 jz L(continue_0_00)
114 cmp $16, %eax
115 jb L(continue_0_0)
116 cmp $32, %eax
117 jb L(continue_0_16)
118 cmp $48, %eax
119 jb L(continue_0_32)
120
121 .p2align 4
122L(continue_0_48):
123 mov (%rsi), %ecx
124 cmp %ecx, (%rdi)
125 jne L(nequal)
126 test %ecx, %ecx
127 jz L(equal)
128
129 mov 4(%rsi), %ecx
130 cmp %ecx, 4(%rdi)
131 jne L(nequal)
132 test %ecx, %ecx
133 jz L(equal)
134
135 mov 8(%rsi), %ecx
136 cmp %ecx, 8(%rdi)
137 jne L(nequal)
138 test %ecx, %ecx
139 jz L(equal)
140
141 mov 12(%rsi), %ecx
142 cmp %ecx, 12(%rdi)
143 jne L(nequal)
144 test %ecx, %ecx
145 jz L(equal)
146
147 movdqu 16(%rdi), %xmm1
148 movdqu 16(%rsi), %xmm2
149 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
150 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
151 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
152 pmovmskb %xmm1, %edx
153 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
154 jnz L(less4_double_words_16)
155
156 movdqu 32(%rdi), %xmm1
157 movdqu 32(%rsi), %xmm2
158 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
159 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
160 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
161 pmovmskb %xmm1, %edx
162 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
163 jnz L(less4_double_words_32)
164
165 mov 48(%rsi), %ecx
166 cmp %ecx, 48(%rdi)
167 jne L(nequal)
168 test %ecx, %ecx
169 jz L(equal)
170
171 mov 52(%rsi), %ecx
172 cmp %ecx, 52(%rdi)
173 jne L(nequal)
174 test %ecx, %ecx
175 jz L(equal)
176
177 mov 56(%rsi), %ecx
178 cmp %ecx, 56(%rdi)
179 jne L(nequal)
180 test %ecx, %ecx
181 jz L(equal)
182
183 mov 60(%rsi), %ecx
184 cmp %ecx, 60(%rdi)
185 jne L(nequal)
186 test %ecx, %ecx
187 jz L(equal)
188
189 add $64, %rsi
190 add $64, %rdi
191 jmp L(continue_0_48)
192
193 .p2align 4
194L(continue_00):
195 and $15, %ch
196 jz L(continue_00_00)
197 cmp $16, %eax
198 jb L(continue_00_0)
199 cmp $32, %eax
200 jb L(continue_00_16)
201 cmp $48, %eax
202 jb L(continue_00_32)
203
204 .p2align 4
205L(continue_00_48):
206 pcmpeqd (%rdi), %xmm0
207 mov (%rdi), %eax
208 pmovmskb %xmm0, %ecx
209 test %ecx, %ecx
210 jnz L(less4_double_words1)
211
212 cmp (%rsi), %eax
213 jne L(nequal)
214
215 mov 4(%rdi), %eax
216 cmp 4(%rsi), %eax
217 jne L(nequal)
218
219 mov 8(%rdi), %eax
220 cmp 8(%rsi), %eax
221 jne L(nequal)
222
223 mov 12(%rdi), %eax
224 cmp 12(%rsi), %eax
225 jne L(nequal)
226
227 movdqu 16(%rsi), %xmm2
228 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
229 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
230 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
231 pmovmskb %xmm2, %edx
232 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
233 jnz L(less4_double_words_16)
234
235 movdqu 32(%rsi), %xmm2
236 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
237 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
238 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
239 pmovmskb %xmm2, %edx
240 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
241 jnz L(less4_double_words_32)
242
243 movdqu 48(%rsi), %xmm2
244 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
245 pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */
246 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
247 pmovmskb %xmm2, %edx
248 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
249 jnz L(less4_double_words_48)
250
251 add $64, %rsi
252 add $64, %rdi
253 jmp L(continue_00_48)
254
255 .p2align 4
256L(continue_32):
257 and $15, %ch
258 jz L(continue_32_00)
259 cmp $16, %eax
260 jb L(continue_0_32)
261 cmp $32, %eax
262 jb L(continue_16_32)
263 cmp $48, %eax
264 jb L(continue_32_32)
265
266 .p2align 4
267L(continue_32_48):
268 mov (%rsi), %ecx
269 cmp %ecx, (%rdi)
270 jne L(nequal)
271 test %ecx, %ecx
272 jz L(equal)
273
274 mov 4(%rsi), %ecx
275 cmp %ecx, 4(%rdi)
276 jne L(nequal)
277 test %ecx, %ecx
278 jz L(equal)
279
280 mov 8(%rsi), %ecx
281 cmp %ecx, 8(%rdi)
282 jne L(nequal)
283 test %ecx, %ecx
284 jz L(equal)
285
286 mov 12(%rsi), %ecx
287 cmp %ecx, 12(%rdi)
288 jne L(nequal)
289 test %ecx, %ecx
290 jz L(equal)
291
292 mov 16(%rsi), %ecx
293 cmp %ecx, 16(%rdi)
294 jne L(nequal)
295 test %ecx, %ecx
296 jz L(equal)
297
298 mov 20(%rsi), %ecx
299 cmp %ecx, 20(%rdi)
300 jne L(nequal)
301 test %ecx, %ecx
302 jz L(equal)
303
304 mov 24(%rsi), %ecx
305 cmp %ecx, 24(%rdi)
306 jne L(nequal)
307 test %ecx, %ecx
308 jz L(equal)
309
310 mov 28(%rsi), %ecx
311 cmp %ecx, 28(%rdi)
312 jne L(nequal)
313 test %ecx, %ecx
314 jz L(equal)
315
316 movdqu 32(%rdi), %xmm1
317 movdqu 32(%rsi), %xmm2
318 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
319 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
320 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
321 pmovmskb %xmm1, %edx
322 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
323 jnz L(less4_double_words_32)
324
325 movdqu 48(%rdi), %xmm1
326 movdqu 48(%rsi), %xmm2
327 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
328 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
329 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
330 pmovmskb %xmm1, %edx
331 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
332 jnz L(less4_double_words_48)
333
334 add $64, %rsi
335 add $64, %rdi
336 jmp L(continue_32_48)
337
338 .p2align 4
339L(continue_16):
340 and $15, %ch
341 jz L(continue_16_00)
342 cmp $16, %eax
343 jb L(continue_0_16)
344 cmp $32, %eax
345 jb L(continue_16_16)
346 cmp $48, %eax
347 jb L(continue_16_32)
348
349 .p2align 4
350L(continue_16_48):
351 mov (%rsi), %ecx
352 cmp %ecx, (%rdi)
353 jne L(nequal)
354 test %ecx, %ecx
355 jz L(equal)
356
357 mov 4(%rsi), %ecx
358 cmp %ecx, 4(%rdi)
359 jne L(nequal)
360 test %ecx, %ecx
361 jz L(equal)
362
363 mov 8(%rsi), %ecx
364 cmp %ecx, 8(%rdi)
365 jne L(nequal)
366 test %ecx, %ecx
367 jz L(equal)
368
369 mov 12(%rsi), %ecx
370 cmp %ecx, 12(%rdi)
371 jne L(nequal)
372 test %ecx, %ecx
373 jz L(equal)
374
375 movdqu 16(%rdi), %xmm1
376 movdqu 16(%rsi), %xmm2
377 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
378 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
379 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
380 pmovmskb %xmm1, %edx
381 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
382 jnz L(less4_double_words_16)
383
384 mov 32(%rsi), %ecx
385 cmp %ecx, 32(%rdi)
386 jne L(nequal)
387 test %ecx, %ecx
388 jz L(equal)
389
390 mov 36(%rsi), %ecx
391 cmp %ecx, 36(%rdi)
392 jne L(nequal)
393 test %ecx, %ecx
394 jz L(equal)
395
396 mov 40(%rsi), %ecx
397 cmp %ecx, 40(%rdi)
398 jne L(nequal)
399 test %ecx, %ecx
400 jz L(equal)
401
402 mov 44(%rsi), %ecx
403 cmp %ecx, 44(%rdi)
404 jne L(nequal)
405 test %ecx, %ecx
406 jz L(equal)
407
408 movdqu 48(%rdi), %xmm1
409 movdqu 48(%rsi), %xmm2
410 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
411 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
412 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
413 pmovmskb %xmm1, %edx
414 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
415 jnz L(less4_double_words_48)
416
417 add $64, %rsi
418 add $64, %rdi
419 jmp L(continue_16_48)
420
421 .p2align 4
422L(continue_00_00):
423 movdqa (%rdi), %xmm1
424 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
425 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
426 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
427 pmovmskb %xmm1, %edx
428 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
429 jnz L(less4_double_words)
430
431 movdqa 16(%rdi), %xmm3
432 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
433 pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */
434 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
435 pmovmskb %xmm3, %edx
436 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
437 jnz L(less4_double_words_16)
438
439 movdqa 32(%rdi), %xmm5
440 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
441 pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */
442 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
443 pmovmskb %xmm5, %edx
444 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
445 jnz L(less4_double_words_32)
446
447 movdqa 48(%rdi), %xmm1
448 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
449 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
450 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
451 pmovmskb %xmm1, %edx
452 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
453 jnz L(less4_double_words_48)
454
455 add $64, %rsi
456 add $64, %rdi
457 jmp L(continue_00_00)
458
459 .p2align 4
460L(continue_00_32):
461 movdqu (%rsi), %xmm2
462 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
463 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
464 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
465 pmovmskb %xmm2, %edx
466 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
467 jnz L(less4_double_words)
468
469 add $16, %rsi
470 add $16, %rdi
471 jmp L(continue_00_48)
472
473 .p2align 4
474L(continue_00_16):
475 movdqu (%rsi), %xmm2
476 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
477 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
478 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
479 pmovmskb %xmm2, %edx
480 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
481 jnz L(less4_double_words)
482
483 movdqu 16(%rsi), %xmm2
484 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
485 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
486 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
487 pmovmskb %xmm2, %edx
488 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
489 jnz L(less4_double_words_16)
490
491 add $32, %rsi
492 add $32, %rdi
493 jmp L(continue_00_48)
494
495 .p2align 4
496L(continue_00_0):
497 movdqu (%rsi), %xmm2
498 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
499 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
500 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
501 pmovmskb %xmm2, %edx
502 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
503 jnz L(less4_double_words)
504
505 movdqu 16(%rsi), %xmm2
506 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
507 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
508 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
509 pmovmskb %xmm2, %edx
510 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
511 jnz L(less4_double_words_16)
512
513 movdqu 32(%rsi), %xmm2
514 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
515 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
516 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
517 pmovmskb %xmm2, %edx
518 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
519 jnz L(less4_double_words_32)
520
521 add $48, %rsi
522 add $48, %rdi
523 jmp L(continue_00_48)
524
525 .p2align 4
526L(continue_48_00):
527 pcmpeqd (%rsi), %xmm0
528 mov (%rdi), %eax
529 pmovmskb %xmm0, %ecx
530 test %ecx, %ecx
531 jnz L(less4_double_words1)
532
533 cmp (%rsi), %eax
534 jne L(nequal)
535
536 mov 4(%rdi), %eax
537 cmp 4(%rsi), %eax
538 jne L(nequal)
539
540 mov 8(%rdi), %eax
541 cmp 8(%rsi), %eax
542 jne L(nequal)
543
544 mov 12(%rdi), %eax
545 cmp 12(%rsi), %eax
546 jne L(nequal)
547
548 movdqu 16(%rdi), %xmm1
549 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
550 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
551 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
552 pmovmskb %xmm1, %edx
553 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
554 jnz L(less4_double_words_16)
555
556 movdqu 32(%rdi), %xmm1
557 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
558 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
559 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
560 pmovmskb %xmm1, %edx
561 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
562 jnz L(less4_double_words_32)
563
564 movdqu 48(%rdi), %xmm1
565 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
566 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
567 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
568 pmovmskb %xmm1, %edx
569 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
570 jnz L(less4_double_words_48)
571
572 add $64, %rsi
573 add $64, %rdi
574 jmp L(continue_48_00)
575
576 .p2align 4
577L(continue_32_00):
578 movdqu (%rdi), %xmm1
579 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
580 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
581 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
582 pmovmskb %xmm1, %edx
583 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
584 jnz L(less4_double_words)
585
586 add $16, %rsi
587 add $16, %rdi
588 jmp L(continue_48_00)
589
590 .p2align 4
591L(continue_16_00):
592 movdqu (%rdi), %xmm1
593 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
594 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
595 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
596 pmovmskb %xmm1, %edx
597 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
598 jnz L(less4_double_words)
599
600 movdqu 16(%rdi), %xmm1
601 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
602 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
603 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
604 pmovmskb %xmm1, %edx
605 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
606 jnz L(less4_double_words_16)
607
608 add $32, %rsi
609 add $32, %rdi
610 jmp L(continue_48_00)
611
612 .p2align 4
613L(continue_0_00):
614 movdqu (%rdi), %xmm1
615 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
616 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
617 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
618 pmovmskb %xmm1, %edx
619 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
620 jnz L(less4_double_words)
621
622 movdqu 16(%rdi), %xmm1
623 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
624 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
625 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
626 pmovmskb %xmm1, %edx
627 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
628 jnz L(less4_double_words_16)
629
630 movdqu 32(%rdi), %xmm1
631 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
632 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
633 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
634 pmovmskb %xmm1, %edx
635 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
636 jnz L(less4_double_words_32)
637
638 add $48, %rsi
639 add $48, %rdi
640 jmp L(continue_48_00)
641
642 .p2align 4
643L(continue_32_32):
644 movdqu (%rdi), %xmm1
645 movdqu (%rsi), %xmm2
646 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
647 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
648 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
649 pmovmskb %xmm1, %edx
650 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
651 jnz L(less4_double_words)
652
653 add $16, %rsi
654 add $16, %rdi
655 jmp L(continue_48_48)
656
657 .p2align 4
658L(continue_16_16):
659 movdqu (%rdi), %xmm1
660 movdqu (%rsi), %xmm2
661 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
662 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
663 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
664 pmovmskb %xmm1, %edx
665 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
666 jnz L(less4_double_words)
667
668 movdqu 16(%rdi), %xmm3
669 movdqu 16(%rsi), %xmm4
670 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
671 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
672 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
673 pmovmskb %xmm3, %edx
674 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
675 jnz L(less4_double_words_16)
676
677 add $32, %rsi
678 add $32, %rdi
679 jmp L(continue_48_48)
680
681 .p2align 4
682L(continue_0_0):
683 movdqu (%rdi), %xmm1
684 movdqu (%rsi), %xmm2
685 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
686 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
687 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
688 pmovmskb %xmm1, %edx
689 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
690 jnz L(less4_double_words)
691
692 movdqu 16(%rdi), %xmm3
693 movdqu 16(%rsi), %xmm4
694 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
695 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
696 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
697 pmovmskb %xmm3, %edx
698 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
699 jnz L(less4_double_words_16)
700
701 movdqu 32(%rdi), %xmm1
702 movdqu 32(%rsi), %xmm2
703 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
704 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
705 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
706 pmovmskb %xmm1, %edx
707 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
708 jnz L(less4_double_words_32)
709
710 add $48, %rsi
711 add $48, %rdi
712 jmp L(continue_48_48)
713
714 .p2align 4
715L(continue_0_16):
716 movdqu (%rdi), %xmm1
717 movdqu (%rsi), %xmm2
718 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
719 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
720 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
721 pmovmskb %xmm1, %edx
722 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
723 jnz L(less4_double_words)
724
725 movdqu 16(%rdi), %xmm1
726 movdqu 16(%rsi), %xmm2
727 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
728 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
729 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
730 pmovmskb %xmm1, %edx
731 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
732 jnz L(less4_double_words_16)
733
734 add $32, %rsi
735 add $32, %rdi
736 jmp L(continue_32_48)
737
738 .p2align 4
739L(continue_0_32):
740 movdqu (%rdi), %xmm1
741 movdqu (%rsi), %xmm2
742 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
743 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
744 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
745 pmovmskb %xmm1, %edx
746 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
747 jnz L(less4_double_words)
748
749 add $16, %rsi
750 add $16, %rdi
751 jmp L(continue_16_48)
752
753 .p2align 4
754L(continue_16_32):
755 movdqu (%rdi), %xmm1
756 movdqu (%rsi), %xmm2
757 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
758 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
759 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
760 pmovmskb %xmm1, %edx
761 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
762 jnz L(less4_double_words)
763
764 add $16, %rsi
765 add $16, %rdi
766 jmp L(continue_32_48)
767
768 .p2align 4
769L(less4_double_words1):
770 cmp (%rsi), %eax
771 jne L(nequal)
772 test %eax, %eax
773 jz L(equal)
774
775 mov 4(%rsi), %ecx
776 cmp %ecx, 4(%rdi)
777 jne L(nequal)
778 test %ecx, %ecx
779 jz L(equal)
780
781 mov 8(%rsi), %ecx
782 cmp %ecx, 8(%rdi)
783 jne L(nequal)
784 test %ecx, %ecx
785 jz L(equal)
786
787 mov 12(%rsi), %ecx
788 cmp %ecx, 12(%rdi)
789 jne L(nequal)
790 xor %eax, %eax
791 ret
792
793 .p2align 4
794L(less4_double_words):
795 xor %eax, %eax
796 test %dl, %dl
797 jz L(next_two_double_words)
798 and $15, %dl
799 jz L(second_double_word)
800 mov (%rdi), %eax
801 cmp (%rsi), %eax
802 jne L(nequal)
803 ret
804
805 .p2align 4
806L(second_double_word):
807 mov 4(%rdi), %eax
808 cmp 4(%rsi), %eax
809 jne L(nequal)
810 ret
811
812 .p2align 4
813L(next_two_double_words):
814 and $15, %dh
815 jz L(fourth_double_word)
816 mov 8(%rdi), %eax
817 cmp 8(%rsi), %eax
818 jne L(nequal)
819 ret
820
821 .p2align 4
822L(fourth_double_word):
823 mov 12(%rdi), %eax
824 cmp 12(%rsi), %eax
825 jne L(nequal)
826 ret
827
828 .p2align 4
829L(less4_double_words_16):
830 xor %eax, %eax
831 test %dl, %dl
832 jz L(next_two_double_words_16)
833 and $15, %dl
834 jz L(second_double_word_16)
835 mov 16(%rdi), %eax
836 cmp 16(%rsi), %eax
837 jne L(nequal)
838 ret
839
840 .p2align 4
841L(second_double_word_16):
842 mov 20(%rdi), %eax
843 cmp 20(%rsi), %eax
844 jne L(nequal)
845 ret
846
847 .p2align 4
848L(next_two_double_words_16):
849 and $15, %dh
850 jz L(fourth_double_word_16)
851 mov 24(%rdi), %eax
852 cmp 24(%rsi), %eax
853 jne L(nequal)
854 ret
855
856 .p2align 4
857L(fourth_double_word_16):
858 mov 28(%rdi), %eax
859 cmp 28(%rsi), %eax
860 jne L(nequal)
861 ret
862
863 .p2align 4
864L(less4_double_words_32):
865 xor %eax, %eax
866 test %dl, %dl
867 jz L(next_two_double_words_32)
868 and $15, %dl
869 jz L(second_double_word_32)
870 mov 32(%rdi), %eax
871 cmp 32(%rsi), %eax
872 jne L(nequal)
873 ret
874
875 .p2align 4
876L(second_double_word_32):
877 mov 36(%rdi), %eax
878 cmp 36(%rsi), %eax
879 jne L(nequal)
880 ret
881
882 .p2align 4
883L(next_two_double_words_32):
884 and $15, %dh
885 jz L(fourth_double_word_32)
886 mov 40(%rdi), %eax
887 cmp 40(%rsi), %eax
888 jne L(nequal)
889 ret
890
891 .p2align 4
892L(fourth_double_word_32):
893 mov 44(%rdi), %eax
894 cmp 44(%rsi), %eax
895 jne L(nequal)
896 ret
897
898 .p2align 4
899L(less4_double_words_48):
900 xor %eax, %eax
901 test %dl, %dl
902 jz L(next_two_double_words_48)
903 and $15, %dl
904 jz L(second_double_word_48)
905 mov 48(%rdi), %eax
906 cmp 48(%rsi), %eax
907 jne L(nequal)
908 ret
909
910 .p2align 4
911L(second_double_word_48):
912 mov 52(%rdi), %eax
913 cmp 52(%rsi), %eax
914 jne L(nequal)
915 ret
916
917 .p2align 4
918L(next_two_double_words_48):
919 and $15, %dh
920 jz L(fourth_double_word_48)
921 mov 56(%rdi), %eax
922 cmp 56(%rsi), %eax
923 jne L(nequal)
924 ret
925
926 .p2align 4
927L(fourth_double_word_48):
928 mov 60(%rdi), %eax
929 cmp 60(%rsi), %eax
930 jne L(nequal)
931 ret
932
933 .p2align 4
934L(nequal):
935 mov $1, %eax
936 jg L(nequal_bigger)
937 neg %eax
938
939L(nequal_bigger):
940 ret
941
942 .p2align 4
943L(equal):
944 xor %rax, %rax
945 ret
946
947END (__wcscmp)
948#ifndef __wcscmp
949libc_hidden_def (__wcscmp)
950weak_alias (__wcscmp, wcscmp)
951#endif
952

source code of glibc/sysdeps/x86_64/wcscmp.S