1/* strchr with SSE2 with bsf
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22
23# define CFI_PUSH(REG) \
24 cfi_adjust_cfa_offset (4); \
25 cfi_rel_offset (REG, 0)
26
27# define CFI_POP(REG) \
28 cfi_adjust_cfa_offset (-4); \
29 cfi_restore (REG)
30
31# define PUSH(REG) pushl REG; CFI_PUSH (REG)
32# define POP(REG) popl REG; CFI_POP (REG)
33
34# define PARMS 8
35# define ENTRANCE PUSH(%edi)
36# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
37
38# define STR1 PARMS
39# define STR2 STR1+4
40
41 .text
42ENTRY (__strchr_sse2_bsf)
43
44 ENTRANCE
45 mov STR1(%esp), %ecx
46 movd STR2(%esp), %xmm1
47
48 pxor %xmm2, %xmm2
49 mov %ecx, %edi
50 punpcklbw %xmm1, %xmm1
51 punpcklbw %xmm1, %xmm1
52 /* ECX has OFFSET. */
53 and $15, %ecx
54 pshufd $0, %xmm1, %xmm1
55 je L(loop)
56
57/* Handle unaligned string. */
58 and $-16, %edi
59 movdqa (%edi), %xmm0
60 pcmpeqb %xmm0, %xmm2
61 pcmpeqb %xmm1, %xmm0
62 /* Find where NULL is. */
63 pmovmskb %xmm2, %edx
64 /* Check if there is a match. */
65 pmovmskb %xmm0, %eax
66 /* Remove the leading bytes. */
67 sarl %cl, %edx
68 sarl %cl, %eax
69 test %eax, %eax
70 je L(unaligned_no_match)
71 /* Check which byte is a match. */
72 bsf %eax, %eax
73 /* Is there a NULL? */
74 test %edx, %edx
75 je L(unaligned_match)
76 bsf %edx, %edx
77 cmpl %edx, %eax
78 /* Return NULL if NULL comes first. */
79 ja L(return_null)
80L(unaligned_match):
81 add %edi, %eax
82 add %ecx, %eax
83 RETURN
84
85 .p2align 4
86L(unaligned_no_match):
87 test %edx, %edx
88 jne L(return_null)
89 pxor %xmm2, %xmm2
90
91 add $16, %edi
92
93 .p2align 4
94/* Loop start on aligned string. */
95L(loop):
96 movdqa (%edi), %xmm0
97 pcmpeqb %xmm0, %xmm2
98 add $16, %edi
99 pcmpeqb %xmm1, %xmm0
100 pmovmskb %xmm2, %edx
101 pmovmskb %xmm0, %eax
102 or %eax, %edx
103 jnz L(matches)
104
105 movdqa (%edi), %xmm0
106 pcmpeqb %xmm0, %xmm2
107 add $16, %edi
108 pcmpeqb %xmm1, %xmm0
109 pmovmskb %xmm2, %edx
110 pmovmskb %xmm0, %eax
111 or %eax, %edx
112 jnz L(matches)
113
114 movdqa (%edi), %xmm0
115 pcmpeqb %xmm0, %xmm2
116 add $16, %edi
117 pcmpeqb %xmm1, %xmm0
118 pmovmskb %xmm2, %edx
119 pmovmskb %xmm0, %eax
120 or %eax, %edx
121 jnz L(matches)
122
123 movdqa (%edi), %xmm0
124 pcmpeqb %xmm0, %xmm2
125 add $16, %edi
126 pcmpeqb %xmm1, %xmm0
127 pmovmskb %xmm2, %edx
128 pmovmskb %xmm0, %eax
129 or %eax, %edx
130 jnz L(matches)
131 jmp L(loop)
132
133L(matches):
134 pmovmskb %xmm2, %edx
135 test %eax, %eax
136 jz L(return_null)
137 bsf %eax, %eax
138 /* There is a match. First find where NULL is. */
139 test %edx, %edx
140 je L(match)
141 bsf %edx, %ecx
142 /* Check if NULL comes first. */
143 cmpl %ecx, %eax
144 ja L(return_null)
145L(match):
146 sub $16, %edi
147 add %edi, %eax
148 RETURN
149
150/* Return NULL. */
151 .p2align 4
152L(return_null):
153 xor %eax, %eax
154 RETURN
155
156END (__strchr_sse2_bsf)
157#endif
158

source code of glibc/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S