1/* strlen -- Compute length of NUL terminated string.
2 Highly optimized version for ix86, x>=5.
3 Copyright (C) 1995-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23/* This version is especially optimized for the i586 (and following?)
24 processors. This is mainly done by using the two pipelines. The
25 version optimized for i486 is weak in this aspect because to get
26 as much parallelism we have to execute some *more* instructions.
27
28 The code below is structured to reflect the pairing of the instructions
29 as *I think* it is. I have no processor data book to verify this.
30 If you find something you think is incorrect let me know. */
31
32
33/* The magic value which is used throughout in the whole code. */
34#define magic 0xfefefeff
35
36#define PARMS 4 /* no space for saved regs */
37#define STR PARMS
38
39 .text
40ENTRY (strlen)
41
42 movl STR(%esp), %eax
43 movl $3, %edx /* load mask (= 3) */
44
45 andl %eax, %edx /* separate last two bits of address */
46
47 jz L(1) /* aligned => start loop */
48 jp L(0) /* exactly two bits set */
49
50 cmpb %dh, (%eax) /* is byte NUL? */
51 je L(2) /* yes => return */
52
53 incl %eax /* increment pointer */
54 cmpb %dh, (%eax) /* is byte NUL? */
55
56 je L(2) /* yes => return */
57
58 incl %eax /* increment pointer */
59 xorl $2, %edx
60
61 jz L(1)
62
63L(0): cmpb %dh, (%eax) /* is byte NUL? */
64 je L(2) /* yes => return */
65
66 incl %eax /* increment pointer */
67 xorl %edx, %edx /* We need %edx == 0 for later */
68
69 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
70 change any of the hole bits of LONGWORD.
71
72 1) Is this safe? Will it catch all the zero bytes?
73 Suppose there is a byte with all zeros. Any carry bits
74 propagating from its left will fall into the hole at its
75 least significant bit and stop. Since there will be no
76 carry from its most significant bit, the LSB of the
77 byte to the left will be unchanged, and the zero will be
78 detected.
79
80 2) Is this worthwhile? Will it ignore everything except
81 zero bytes? Suppose every byte of LONGWORD has a bit set
82 somewhere. There will be a carry into bit 8. If bit 8
83 is set, this will carry into bit 16. If bit 8 is clear,
84 one of bits 9-15 must be set, so there will be a carry
85 into bit 16. Similarly, there will be a carry into bit
86 24. If one of bits 24-31 is set, there will be a carry
87 into bit 32 (=carry flag), so all of the hole bits will
88 be changed.
89
90 Note: %edx == 0 in any case here. */
91
92L(1):
93 movl (%eax), %ecx /* get word (= 4 bytes) in question */
94 addl $4, %eax /* adjust pointer for *next* word */
95
96 subl %ecx, %edx /* first step to negate word */
97 addl $magic, %ecx /* add magic word */
98
99 decl %edx /* complete negation of word */
100 jnc L(3) /* previous addl caused overflow? */
101
102 xorl %ecx, %edx /* (word+magic)^word */
103
104 andl $~magic, %edx /* any of the carry flags set? */
105
106 jne L(3) /* yes => determine byte */
107
108
109 movl (%eax), %ecx /* get word (= 4 bytes) in question */
110 addl $4, %eax /* adjust pointer for *next* word */
111
112 subl %ecx, %edx /* first step to negate word */
113 addl $magic, %ecx /* add magic word */
114
115 decl %edx /* complete negation of word */
116 jnc L(3) /* previous addl caused overflow? */
117
118 xorl %ecx, %edx /* (word+magic)^word */
119
120 andl $~magic, %edx /* any of the carry flags set? */
121
122 jne L(3) /* yes => determine byte */
123
124
125 movl (%eax), %ecx /* get word (= 4 bytes) in question */
126 addl $4, %eax /* adjust pointer for *next* word */
127
128 subl %ecx, %edx /* first step to negate word */
129 addl $magic, %ecx /* add magic word */
130
131 decl %edx /* complete negation of word */
132 jnc L(3) /* previous addl caused overflow? */
133
134 xorl %ecx, %edx /* (word+magic)^word */
135
136 andl $~magic, %edx /* any of the carry flags set? */
137
138 jne L(3) /* yes => determine byte */
139
140
141 movl (%eax), %ecx /* get word (= 4 bytes) in question */
142 addl $4, %eax /* adjust pointer for *next* word */
143
144 subl %ecx, %edx /* first step to negate word */
145 addl $magic, %ecx /* add magic word */
146
147 decl %edx /* complete negation of word */
148 jnc L(3) /* previous addl caused overflow? */
149
150 xorl %ecx, %edx /* (word+magic)^word */
151
152 andl $~magic, %edx /* any of the carry flags set? */
153
154 je L(1) /* no => start loop again */
155
156
157L(3): subl $4, %eax /* correct too early pointer increment */
158 subl $magic, %ecx
159
160 cmpb $0, %cl /* lowest byte NUL? */
161 jz L(2) /* yes => return */
162
163 inc %eax /* increment pointer */
164 testb %ch, %ch /* second byte NUL? */
165
166 jz L(2) /* yes => return */
167
168 shrl $16, %ecx /* make upper bytes accessible */
169 incl %eax /* increment pointer */
170
171 cmpb $0, %cl /* is third byte NUL? */
172 jz L(2) /* yes => return */
173
174 incl %eax /* increment pointer */
175
176L(2): subl STR(%esp), %eax /* now compute the length as difference
177 between start and terminating NUL
178 character */
179 ret
180END (strlen)
181libc_hidden_builtin_def (strlen)
182

source code of glibc/sysdeps/i386/i586/strlen.S