1/* wcslen with SSE2
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20# include <sysdep.h>
21# define STR 4
22
23 .text
24ENTRY (__wcslen_sse2)
25 mov STR(%esp), %edx
26
27 cmpl $0, (%edx)
28 jz L(exit_tail0)
29 cmpl $0, 4(%edx)
30 jz L(exit_tail1)
31 cmpl $0, 8(%edx)
32 jz L(exit_tail2)
33 cmpl $0, 12(%edx)
34 jz L(exit_tail3)
35 cmpl $0, 16(%edx)
36 jz L(exit_tail4)
37 cmpl $0, 20(%edx)
38 jz L(exit_tail5)
39 cmpl $0, 24(%edx)
40 jz L(exit_tail6)
41 cmpl $0, 28(%edx)
42 jz L(exit_tail7)
43
44 pxor %xmm0, %xmm0
45
46 lea 32(%edx), %eax
47 lea 16(%edx), %ecx
48 and $-16, %eax
49
50 pcmpeqd (%eax), %xmm0
51 pmovmskb %xmm0, %edx
52 pxor %xmm1, %xmm1
53 test %edx, %edx
54 lea 16(%eax), %eax
55 jnz L(exit)
56
57 pcmpeqd (%eax), %xmm1
58 pmovmskb %xmm1, %edx
59 pxor %xmm2, %xmm2
60 test %edx, %edx
61 lea 16(%eax), %eax
62 jnz L(exit)
63
64 pcmpeqd (%eax), %xmm2
65 pmovmskb %xmm2, %edx
66 pxor %xmm3, %xmm3
67 test %edx, %edx
68 lea 16(%eax), %eax
69 jnz L(exit)
70
71 pcmpeqd (%eax), %xmm3
72 pmovmskb %xmm3, %edx
73 test %edx, %edx
74 lea 16(%eax), %eax
75 jnz L(exit)
76
77 and $-0x40, %eax
78
79 .p2align 4
80L(aligned_64_loop):
81 movaps (%eax), %xmm0
82 movaps 16(%eax), %xmm1
83 movaps 32(%eax), %xmm2
84 movaps 48(%eax), %xmm6
85
86 pminub %xmm1, %xmm0
87 pminub %xmm6, %xmm2
88 pminub %xmm0, %xmm2
89 pcmpeqd %xmm3, %xmm2
90 pmovmskb %xmm2, %edx
91 test %edx, %edx
92 lea 64(%eax), %eax
93 jz L(aligned_64_loop)
94
95 pcmpeqd -64(%eax), %xmm3
96 pmovmskb %xmm3, %edx
97 test %edx, %edx
98 lea 48(%ecx), %ecx
99 jnz L(exit)
100
101 pcmpeqd %xmm1, %xmm3
102 pmovmskb %xmm3, %edx
103 test %edx, %edx
104 lea -16(%ecx), %ecx
105 jnz L(exit)
106
107 pcmpeqd -32(%eax), %xmm3
108 pmovmskb %xmm3, %edx
109 test %edx, %edx
110 lea -16(%ecx), %ecx
111 jnz L(exit)
112
113 pcmpeqd %xmm6, %xmm3
114 pmovmskb %xmm3, %edx
115 test %edx, %edx
116 lea -16(%ecx), %ecx
117 jnz L(exit)
118
119 jmp L(aligned_64_loop)
120
121 .p2align 4
122L(exit):
123 sub %ecx, %eax
124 shr $2, %eax
125 test %dl, %dl
126 jz L(exit_high)
127
128 mov %dl, %cl
129 and $15, %cl
130 jz L(exit_1)
131 ret
132
133 .p2align 4
134L(exit_high):
135 mov %dh, %ch
136 and $15, %ch
137 jz L(exit_3)
138 add $2, %eax
139 ret
140
141 .p2align 4
142L(exit_1):
143 add $1, %eax
144 ret
145
146 .p2align 4
147L(exit_3):
148 add $3, %eax
149 ret
150
151 .p2align 4
152L(exit_tail0):
153 xor %eax, %eax
154 ret
155
156 .p2align 4
157L(exit_tail1):
158 mov $1, %eax
159 ret
160
161 .p2align 4
162L(exit_tail2):
163 mov $2, %eax
164 ret
165
166 .p2align 4
167L(exit_tail3):
168 mov $3, %eax
169 ret
170
171 .p2align 4
172L(exit_tail4):
173 mov $4, %eax
174 ret
175
176 .p2align 4
177L(exit_tail5):
178 mov $5, %eax
179 ret
180
181 .p2align 4
182L(exit_tail6):
183 mov $6, %eax
184 ret
185
186 .p2align 4
187L(exit_tail7):
188 mov $7, %eax
189 ret
190
191END (__wcslen_sse2)
192#endif
193

source code of glibc/sysdeps/i386/i686/multiarch/wcslen-sse2.S