1/* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR
2 less than LEN. For Intel 80x86, x>=3.
3 Copyright (C) 1994-2022 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23#define PARMS 4+8 /* space for 2 saved regs */
24#define RTN PARMS
25#define STR RTN
26#define CHR STR+4
27#define LEN CHR+4
28
29 .text
30ENTRY (__memchr)
31
32 /* Save callee-safe registers used in this function. */
33 pushl %esi
34 cfi_adjust_cfa_offset (4)
35 pushl %edi
36 cfi_adjust_cfa_offset (4)
37 cfi_rel_offset (edi, 0)
38
39 /* Load parameters into registers. */
40 movl STR(%esp), %eax /* str: pointer to memory block. */
41 movl CHR(%esp), %edx /* c: byte we are looking for. */
42 movl LEN(%esp), %esi /* len: length of memory block. */
43 cfi_rel_offset (esi, 4)
44
45 /* If my must not test more than three characters test
46 them one by one. This is especially true for 0. */
47 cmpl $4, %esi
48 jb L(3)
49
50 /* At the moment %edx contains CHR. What we need for the
51 algorithm is CHR in all bytes of the dword. Avoid
52 operations on 16 bit words because these require an
53 prefix byte (and one more cycle). */
54 movb %dl, %dh /* Now it is 0|0|c|c */
55 movl %edx, %ecx
56 shll $16, %edx /* Now c|c|0|0 */
57 movw %cx, %dx /* And finally c|c|c|c */
58
59 /* Better performance can be achieved if the word (32
60 bit) memory access is aligned on a four-byte-boundary.
61 So process first bytes one by one until boundary is
62 reached. Don't use a loop for better performance. */
63
64 testb $3, %al /* correctly aligned ? */
65 je L(2) /* yes => begin loop */
66 cmpb %dl, (%eax) /* compare byte */
67 je L(9) /* target found => return */
68 incl %eax /* increment source pointer */
69 decl %esi /* decrement length counter */
70 je L(4) /* len==0 => return NULL */
71
72 testb $3, %al /* correctly aligned ? */
73 je L(2) /* yes => begin loop */
74 cmpb %dl, (%eax) /* compare byte */
75 je L(9) /* target found => return */
76 incl %eax /* increment source pointer */
77 decl %esi /* decrement length counter */
78 je L(4) /* len==0 => return NULL */
79
80 testb $3, %al /* correctly aligned ? */
81 je L(2) /* yes => begin loop */
82 cmpb %dl, (%eax) /* compare byte */
83 je L(9) /* target found => return */
84 incl %eax /* increment source pointer */
85 decl %esi /* decrement length counter */
86 /* no test for len==0 here, because this is done in the
87 loop head */
88 jmp L(2)
89
90 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
91 change any of the hole bits of LONGWORD.
92
93 1) Is this safe? Will it catch all the zero bytes?
94 Suppose there is a byte with all zeros. Any carry bits
95 propagating from its left will fall into the hole at its
96 least significant bit and stop. Since there will be no
97 carry from its most significant bit, the LSB of the
98 byte to the left will be unchanged, and the zero will be
99 detected.
100
101 2) Is this worthwhile? Will it ignore everything except
102 zero bytes? Suppose every byte of LONGWORD has a bit set
103 somewhere. There will be a carry into bit 8. If bit 8
104 is set, this will carry into bit 16. If bit 8 is clear,
105 one of bits 9-15 must be set, so there will be a carry
106 into bit 16. Similarly, there will be a carry into bit
107 24. If one of bits 24-31 is set, there will be a carry
108 into bit 32 (=carry flag), so all of the hole bits will
109 be changed.
110
111 3) But wait! Aren't we looking for CHR, not zero?
112 Good point. So what we do is XOR LONGWORD with a longword,
113 each of whose bytes is CHR. This turns each byte that is CHR
114 into a zero. */
115
116
117 /* Each round the main loop processes 16 bytes. */
118
119 ALIGN (4)
120
121L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
122 movl $0xfefefeff, %edi /* magic value */
123 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
124 are now 0 */
125 addl %ecx, %edi /* add the magic value to the word. We get
126 carry bits reported for each byte which
127 is *not* 0 */
128
129 /* According to the algorithm we had to reverse the effect of the
130 XOR first and then test the overflow bits. But because the
131 following XOR would destroy the carry flag and it would (in a
132 representation with more than 32 bits) not alter then last
133 overflow, we can now test this condition. If no carry is signaled
134 no overflow must have occurred in the last byte => it was 0. */
135 jnc L(8)
136
137 /* We are only interested in carry bits that change due to the
138 previous add, so remove original bits */
139 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
140
141 /* Now test for the other three overflow bits. */
142 orl $0xfefefeff, %edi /* set all non-carry bits */
143 incl %edi /* add 1: if one carry bit was *not* set
144 the addition will not result in 0. */
145
146 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
147 jnz L(8) /* found it => return pointer */
148
149 /* This process is unfolded four times for better performance.
150 we don't increment the source pointer each time. Instead we
151 use offsets and increment by 16 in each run of the loop. But
152 before probing for the matching byte we need some extra code
153 (following LL(13) below). Even the len can be compared with
154 constants instead of decrementing each time. */
155
156 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
157 movl $0xfefefeff, %edi /* magic value */
158 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
159 are now 0 */
160 addl %ecx, %edi /* add the magic value to the word. We get
161 carry bits reported for each byte which
162 is *not* 0 */
163 jnc L(7) /* highest byte is CHR => return pointer */
164 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
165 orl $0xfefefeff, %edi /* set all non-carry bits */
166 incl %edi /* add 1: if one carry bit was *not* set
167 the addition will not result in 0. */
168 jnz L(7) /* found it => return pointer */
169
170 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
171 movl $0xfefefeff, %edi /* magic value */
172 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
173 are now 0 */
174 addl %ecx, %edi /* add the magic value to the word. We get
175 carry bits reported for each byte which
176 is *not* 0 */
177 jnc L(6) /* highest byte is CHR => return pointer */
178 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
179 orl $0xfefefeff, %edi /* set all non-carry bits */
180 incl %edi /* add 1: if one carry bit was *not* set
181 the addition will not result in 0. */
182 jnz L(6) /* found it => return pointer */
183
184 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
185 movl $0xfefefeff, %edi /* magic value */
186 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
187 are now 0 */
188 addl %ecx, %edi /* add the magic value to the word. We get
189 carry bits reported for each byte which
190 is *not* 0 */
191 jnc L(5) /* highest byte is CHR => return pointer */
192 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
193 orl $0xfefefeff, %edi /* set all non-carry bits */
194 incl %edi /* add 1: if one carry bit was *not* set
195 the addition will not result in 0. */
196 jnz L(5) /* found it => return pointer */
197
198 /* Adjust both counters for a full round, i.e. 16 bytes. */
199 addl $16, %eax
200L(2): subl $16, %esi
201 jae L(1) /* Still more than 16 bytes remaining */
202
203 /* Process remaining bytes separately. */
204 cmpl $4-16, %esi /* rest < 4 bytes? */
205 jb L(3) /* yes, than test byte by byte */
206
207 movl (%eax), %ecx /* get word (= 4 bytes) in question */
208 movl $0xfefefeff, %edi /* magic value */
209 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
210 are now 0 */
211 addl %ecx, %edi /* add the magic value to the word. We get
212 carry bits reported for each byte which
213 is *not* 0 */
214 jnc L(8) /* highest byte is CHR => return pointer */
215 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
216 orl $0xfefefeff, %edi /* set all non-carry bits */
217 incl %edi /* add 1: if one carry bit was *not* set
218 the addition will not result in 0. */
219 jne L(8) /* found it => return pointer */
220 addl $4, %eax /* adjust source pointer */
221
222 cmpl $8-16, %esi /* rest < 8 bytes? */
223 jb L(3) /* yes, than test byte by byte */
224
225 movl (%eax), %ecx /* get word (= 4 bytes) in question */
226 movl $0xfefefeff, %edi /* magic value */
227 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
228 are now 0 */
229 addl %ecx, %edi /* add the magic value to the word. We get
230 carry bits reported for each byte which
231 is *not* 0 */
232 jnc L(8) /* highest byte is CHR => return pointer */
233 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
234 orl $0xfefefeff, %edi /* set all non-carry bits */
235 incl %edi /* add 1: if one carry bit was *not* set
236 the addition will not result in 0. */
237 jne L(8) /* found it => return pointer */
238 addl $4, %eax /* adjust source pointer */
239
240 cmpl $12-16, %esi /* rest < 12 bytes? */
241 jb L(3) /* yes, than test byte by byte */
242
243 movl (%eax), %ecx /* get word (= 4 bytes) in question */
244 movl $0xfefefeff, %edi /* magic value */
245 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
246 are now 0 */
247 addl %ecx, %edi /* add the magic value to the word. We get
248 carry bits reported for each byte which
249 is *not* 0 */
250 jnc L(8) /* highest byte is CHR => return pointer */
251 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
252 orl $0xfefefeff, %edi /* set all non-carry bits */
253 incl %edi /* add 1: if one carry bit was *not* set
254 the addition will not result in 0. */
255 jne L(8) /* found it => return pointer */
256 addl $4, %eax /* adjust source pointer */
257
258 /* Check the remaining bytes one by one. */
259L(3): andl $3, %esi /* mask out uninteresting bytes */
260 jz L(4) /* no remaining bytes => return NULL */
261
262 cmpb %dl, (%eax) /* compare byte with CHR */
263 je L(9) /* equal, than return pointer */
264 incl %eax /* increment source pointer */
265 decl %esi /* decrement length */
266 jz L(4) /* no remaining bytes => return NULL */
267
268 cmpb %dl, (%eax) /* compare byte with CHR */
269 je L(9) /* equal, than return pointer */
270 incl %eax /* increment source pointer */
271 decl %esi /* decrement length */
272 jz L(4) /* no remaining bytes => return NULL */
273
274 cmpb %dl, (%eax) /* compare byte with CHR */
275 je L(9) /* equal, than return pointer */
276
277L(4): /* no byte found => return NULL */
278 xorl %eax, %eax
279 jmp L(9)
280
281 /* add missing source pointer increments */
282L(5): addl $4, %eax
283L(6): addl $4, %eax
284L(7): addl $4, %eax
285
286 /* Test for the matching byte in the word. %ecx contains a NUL
287 char in the byte which originally was the byte we are looking
288 at. */
289L(8): testb %cl, %cl /* test first byte in dword */
290 jz L(9) /* if zero => return pointer */
291 incl %eax /* increment source pointer */
292
293 testb %ch, %ch /* test second byte in dword */
294 jz L(9) /* if zero => return pointer */
295 incl %eax /* increment source pointer */
296
297 testl $0xff0000, %ecx /* test third byte in dword */
298 jz L(9) /* if zero => return pointer */
299 incl %eax /* increment source pointer */
300
301 /* No further test needed we we know it is one of the four bytes. */
302L(9): popl %edi /* pop saved registers */
303 cfi_adjust_cfa_offset (-4)
304 cfi_restore (edi)
305 popl %esi
306 cfi_adjust_cfa_offset (-4)
307 cfi_restore (esi)
308
309 ret
310END (__memchr)
311
312weak_alias (__memchr, memchr)
313libc_hidden_builtin_def (memchr)
314

source code of glibc/sysdeps/i386/memchr.S