1 | /* strchr -- find the first instance of C in a nul-terminated string. |
2 | Copyright (C) 2013-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library. If not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | |
21 | .syntax unified |
22 | .text |
23 | |
24 | ENTRY (strchr) |
25 | @ r0 = start of string |
26 | @ r1 = character to match |
27 | @ returns NULL for no match, or a pointer to the match |
28 | ldrb r2, [r0] @ load the first byte asap |
29 | uxtb r1, r1 |
30 | |
31 | @ To cater to long strings, we want to search through a few |
32 | @ characters until we reach an aligned pointer. To cater to |
33 | @ small strings, we don't want to start doing word operations |
34 | @ immediately. The compromise is a maximum of 16 bytes less |
35 | @ whatever is required to end with an aligned pointer. |
36 | @ r3 = number of characters to search in alignment loop |
37 | and r3, r0, #7 |
38 | rsb r3, r3, #15 @ 16 - 1 peeled loop iteration |
39 | cmp r2, r1 @ Found C? |
40 | it ne |
41 | cmpne r2, #0 @ Found EOS? |
42 | beq 99f |
43 | |
44 | @ Loop until we find ... |
45 | 1: ldrb r2, [r0, #1]! |
46 | subs r3, r3, #1 @ ... the alignment point |
47 | it ne |
48 | cmpne r2, r1 @ ... or the character |
49 | it ne |
50 | cmpne r2, #0 @ ... or EOS |
51 | bne 1b |
52 | |
53 | @ Disambiguate the exit possibilities above |
54 | cmp r2, r1 @ Found the character |
55 | it ne |
56 | cmpne r2, #0 @ Found EOS |
57 | beq 99f |
58 | add r0, r0, #1 |
59 | |
60 | @ So now we're aligned. Now we actually need a stack frame. |
61 | push { r4, r5, r6, r7 } |
62 | cfi_adjust_cfa_offset (16) |
63 | cfi_rel_offset (r4, 0) |
64 | cfi_rel_offset (r5, 4) |
65 | cfi_rel_offset (r6, 8) |
66 | cfi_rel_offset (r7, 12) |
67 | |
68 | ldrd r2, r3, [r0], #8 |
69 | orr r1, r1, r1, lsl #8 @ Replicate C to all bytes |
70 | #ifdef ARCH_HAS_T2 |
71 | movw ip, #0x0101 |
72 | pld [r0, #64] |
73 | movt ip, #0x0101 |
74 | #else |
75 | ldr ip, =0x01010101 |
76 | pld [r0, #64] |
77 | #endif |
78 | orr r1, r1, r1, lsl #16 |
79 | |
80 | @ Loop searching for EOS or C, 8 bytes at a time. |
81 | 2: |
82 | @ Subtracting (unsigned saturating) from 1 means result of 1 for |
83 | @ any byte that was originally zero and 0 otherwise. Therefore |
84 | @ we consider the lsb of each byte the "found" bit. |
85 | uqsub8 r4, ip, r2 @ Find EOS |
86 | eor r6, r2, r1 @ Convert C bytes to 0 |
87 | uqsub8 r5, ip, r3 |
88 | eor r7, r3, r1 |
89 | uqsub8 r6, ip, r6 @ Find C |
90 | pld [r0, #128] @ Prefetch 2 lines ahead |
91 | uqsub8 r7, ip, r7 |
92 | orr r4, r4, r6 @ Combine found for EOS and C |
93 | orr r5, r5, r7 |
94 | orrs r6, r4, r5 @ Combine the two words |
95 | it eq |
96 | ldrdeq r2, r3, [r0], #8 |
97 | beq 2b |
98 | |
99 | @ Found something. Disambiguate between first and second words. |
100 | @ Adjust r0 to point to the word containing the match. |
101 | @ Adjust r2 to the contents of the word containing the match. |
102 | @ Adjust r4 to the found bits for the word containing the match. |
103 | cmp r4, #0 |
104 | sub r0, r0, #4 |
105 | itte eq |
106 | moveq r4, r5 |
107 | moveq r2, r3 |
108 | subne r0, r0, #4 |
109 | |
110 | @ Find the bit-offset of the match within the word. |
111 | #if defined(__ARMEL__) |
112 | @ For LE, swap the found word so clz searches from the little end. |
113 | rev r4, r4 |
114 | #else |
115 | @ For BE, byte swap the word to make it easier to extract the byte. |
116 | rev r2, r2 |
117 | #endif |
118 | @ We're counting 0x01 (not 0x80), so the bit offset is 7 too high. |
119 | clz r3, r4 |
120 | sub r3, r3, #7 |
121 | lsr r2, r2, r3 @ Shift down found byte |
122 | uxtb r1, r1 @ Undo replication of C |
123 | uxtb r2, r2 @ Extract found byte |
124 | add r0, r0, r3, lsr #3 @ Adjust the pointer to the found byte |
125 | |
126 | pop { r4, r5, r6, r7 } |
127 | cfi_adjust_cfa_offset (-16) |
128 | cfi_restore (r4) |
129 | cfi_restore (r5) |
130 | cfi_restore (r6) |
131 | cfi_restore (r7) |
132 | |
133 | @ Disambiguate between EOS and C. |
134 | 99: |
135 | cmp r2, r1 |
136 | it ne |
137 | movne r0, #0 @ Found EOS, return NULL |
138 | bx lr |
139 | |
140 | END (strchr) |
141 | |
142 | weak_alias (strchr, index) |
143 | libc_hidden_builtin_def (strchr) |
144 | |