1/* strchr SSE2 without bsf
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22
23# define CFI_PUSH(REG) \
24 cfi_adjust_cfa_offset (4); \
25 cfi_rel_offset (REG, 0)
26
27# define CFI_POP(REG) \
28 cfi_adjust_cfa_offset (-4); \
29 cfi_restore (REG)
30
31# define PUSH(REG) pushl REG; CFI_PUSH (REG)
32# define POP(REG) popl REG; CFI_POP (REG)
33
34# define PARMS 8
35# define ENTRANCE PUSH(%edi)
36# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
37
38# define STR1 PARMS
39# define STR2 STR1+4
40
41 atom_text_section
42ENTRY (__strchr_sse2)
43
44 ENTRANCE
45 mov STR1(%esp), %ecx
46 movd STR2(%esp), %xmm1
47
48 pxor %xmm2, %xmm2
49 mov %ecx, %edi
50 punpcklbw %xmm1, %xmm1
51 punpcklbw %xmm1, %xmm1
52 /* ECX has OFFSET. */
53 and $15, %ecx
54 pshufd $0, %xmm1, %xmm1
55 je L(loop)
56
57/* Handle unaligned string. */
58 and $-16, %edi
59 movdqa (%edi), %xmm0
60 pcmpeqb %xmm0, %xmm2
61 pcmpeqb %xmm1, %xmm0
62 /* Find where NULL is. */
63 pmovmskb %xmm2, %edx
64 /* Check if there is a match. */
65 pmovmskb %xmm0, %eax
66 /* Remove the leading bytes. */
67 sarl %cl, %edx
68 sarl %cl, %eax
69 test %eax, %eax
70 jz L(unaligned_no_match)
71 /* Check which byte is a match. */
72 /* Is there a NULL? */
73 add %ecx, %edi
74 test %edx, %edx
75 jz L(match_case1)
76 jmp L(match_case2)
77
78 .p2align 4
79L(unaligned_no_match):
80 test %edx, %edx
81 jne L(return_null)
82
83 pxor %xmm2, %xmm2
84 add $16, %edi
85
86 .p2align 4
87/* Loop start on aligned string. */
88L(loop):
89 movdqa (%edi), %xmm0
90 pcmpeqb %xmm0, %xmm2
91 pcmpeqb %xmm1, %xmm0
92 pmovmskb %xmm2, %edx
93 pmovmskb %xmm0, %eax
94 test %eax, %eax
95 jnz L(matches)
96 test %edx, %edx
97 jnz L(return_null)
98 add $16, %edi
99
100 movdqa (%edi), %xmm0
101 pcmpeqb %xmm0, %xmm2
102 pcmpeqb %xmm1, %xmm0
103 pmovmskb %xmm2, %edx
104 pmovmskb %xmm0, %eax
105 test %eax, %eax
106 jnz L(matches)
107 test %edx, %edx
108 jnz L(return_null)
109 add $16, %edi
110
111 movdqa (%edi), %xmm0
112 pcmpeqb %xmm0, %xmm2
113 pcmpeqb %xmm1, %xmm0
114 pmovmskb %xmm2, %edx
115 pmovmskb %xmm0, %eax
116 test %eax, %eax
117 jnz L(matches)
118 test %edx, %edx
119 jnz L(return_null)
120 add $16, %edi
121
122 movdqa (%edi), %xmm0
123 pcmpeqb %xmm0, %xmm2
124 pcmpeqb %xmm1, %xmm0
125 pmovmskb %xmm2, %edx
126 pmovmskb %xmm0, %eax
127 test %eax, %eax
128 jnz L(matches)
129 test %edx, %edx
130 jnz L(return_null)
131 add $16, %edi
132 jmp L(loop)
133
134L(matches):
135 /* There is a match. First find where NULL is. */
136 test %edx, %edx
137 jz L(match_case1)
138
139 .p2align 4
140L(match_case2):
141 test %al, %al
142 jz L(match_higth_case2)
143
144 mov %al, %cl
145 and $15, %cl
146 jnz L(match_case2_4)
147
148 mov %dl, %ch
149 and $15, %ch
150 jnz L(return_null)
151
152 test $0x10, %al
153 jnz L(Exit5)
154 test $0x10, %dl
155 jnz L(return_null)
156 test $0x20, %al
157 jnz L(Exit6)
158 test $0x20, %dl
159 jnz L(return_null)
160 test $0x40, %al
161 jnz L(Exit7)
162 test $0x40, %dl
163 jnz L(return_null)
164 lea 7(%edi), %eax
165 RETURN
166
167 .p2align 4
168L(match_case2_4):
169 test $0x01, %al
170 jnz L(Exit1)
171 test $0x01, %dl
172 jnz L(return_null)
173 test $0x02, %al
174 jnz L(Exit2)
175 test $0x02, %dl
176 jnz L(return_null)
177 test $0x04, %al
178 jnz L(Exit3)
179 test $0x04, %dl
180 jnz L(return_null)
181 lea 3(%edi), %eax
182 RETURN
183
184 .p2align 4
185L(match_higth_case2):
186 test %dl, %dl
187 jnz L(return_null)
188
189 mov %ah, %cl
190 and $15, %cl
191 jnz L(match_case2_12)
192
193 mov %dh, %ch
194 and $15, %ch
195 jnz L(return_null)
196
197 test $0x10, %ah
198 jnz L(Exit13)
199 test $0x10, %dh
200 jnz L(return_null)
201 test $0x20, %ah
202 jnz L(Exit14)
203 test $0x20, %dh
204 jnz L(return_null)
205 test $0x40, %ah
206 jnz L(Exit15)
207 test $0x40, %dh
208 jnz L(return_null)
209 lea 15(%edi), %eax
210 RETURN
211
212 .p2align 4
213L(match_case2_12):
214 test $0x01, %ah
215 jnz L(Exit9)
216 test $0x01, %dh
217 jnz L(return_null)
218 test $0x02, %ah
219 jnz L(Exit10)
220 test $0x02, %dh
221 jnz L(return_null)
222 test $0x04, %ah
223 jnz L(Exit11)
224 test $0x04, %dh
225 jnz L(return_null)
226 lea 11(%edi), %eax
227 RETURN
228
229 .p2align 4
230L(match_case1):
231 test %al, %al
232 jz L(match_higth_case1)
233
234 test $0x01, %al
235 jnz L(Exit1)
236 test $0x02, %al
237 jnz L(Exit2)
238 test $0x04, %al
239 jnz L(Exit3)
240 test $0x08, %al
241 jnz L(Exit4)
242 test $0x10, %al
243 jnz L(Exit5)
244 test $0x20, %al
245 jnz L(Exit6)
246 test $0x40, %al
247 jnz L(Exit7)
248 lea 7(%edi), %eax
249 RETURN
250
251 .p2align 4
252L(match_higth_case1):
253 test $0x01, %ah
254 jnz L(Exit9)
255 test $0x02, %ah
256 jnz L(Exit10)
257 test $0x04, %ah
258 jnz L(Exit11)
259 test $0x08, %ah
260 jnz L(Exit12)
261 test $0x10, %ah
262 jnz L(Exit13)
263 test $0x20, %ah
264 jnz L(Exit14)
265 test $0x40, %ah
266 jnz L(Exit15)
267 lea 15(%edi), %eax
268 RETURN
269
270 .p2align 4
271L(Exit1):
272 lea (%edi), %eax
273 RETURN
274
275 .p2align 4
276L(Exit2):
277 lea 1(%edi), %eax
278 RETURN
279
280 .p2align 4
281L(Exit3):
282 lea 2(%edi), %eax
283 RETURN
284
285 .p2align 4
286L(Exit4):
287 lea 3(%edi), %eax
288 RETURN
289
290 .p2align 4
291L(Exit5):
292 lea 4(%edi), %eax
293 RETURN
294
295 .p2align 4
296L(Exit6):
297 lea 5(%edi), %eax
298 RETURN
299
300 .p2align 4
301L(Exit7):
302 lea 6(%edi), %eax
303 RETURN
304
305 .p2align 4
306L(Exit9):
307 lea 8(%edi), %eax
308 RETURN
309
310 .p2align 4
311L(Exit10):
312 lea 9(%edi), %eax
313 RETURN
314
315 .p2align 4
316L(Exit11):
317 lea 10(%edi), %eax
318 RETURN
319
320 .p2align 4
321L(Exit12):
322 lea 11(%edi), %eax
323 RETURN
324
325 .p2align 4
326L(Exit13):
327 lea 12(%edi), %eax
328 RETURN
329
330 .p2align 4
331L(Exit14):
332 lea 13(%edi), %eax
333 RETURN
334
335 .p2align 4
336L(Exit15):
337 lea 14(%edi), %eax
338 RETURN
339
340/* Return NULL. */
341 .p2align 4
342L(return_null):
343 xor %eax, %eax
344 RETURN
345
346END (__strchr_sse2)
347#endif
348

source code of glibc/sysdeps/i386/i686/multiarch/strchr-sse2.S