1 | /* Copyright (C) 2011-2024 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library. If not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #include <sysdep.h> |
19 | |
20 | @ This memchr routine is optimised on a Cortex-A9 and should work on all ARMv7 |
21 | @ and ARMv6T2 processors. It has a fast path for short sizes, and has an |
22 | @ optimised path for large data sets; the worst case is finding the match early |
23 | @ in a large data set. |
24 | @ Note: The use of cbz/cbnz means it's Thumb only |
25 | |
26 | @ 2011-07-15 david.gilbert@linaro.org |
27 | @ Copy from Cortex strings release 21 and change license |
28 | @ http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/view/head:/src/linaro-a9/memchr.S |
29 | @ Change function declarations/entry/exit |
30 | @ 2011-12-01 david.gilbert@linaro.org |
31 | @ Add some fixes from comments received (including use of ldrd instead ldm) |
32 | @ 2011-12-07 david.gilbert@linaro.org |
33 | @ Removed cbz from align loop - can't be taken |
34 | |
35 | @ this lets us check a flag in a 00/ff byte easily in either endianness |
36 | #ifdef __ARMEB__ |
37 | #define CHARTSTMASK(c) 1<<(31-(c*8)) |
38 | #else |
39 | #define CHARTSTMASK(c) 1<<(c*8) |
40 | #endif |
41 | .syntax unified |
42 | |
43 | .text |
44 | .thumb |
45 | .thumb_func |
46 | .global memchr |
47 | .type memchr,%function |
48 | ENTRY(memchr) |
49 | @ r0 = start of memory to scan |
50 | @ r1 = character to look for |
51 | @ r2 = length |
52 | @ returns r0 = pointer to character or NULL if not found |
53 | and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char |
54 | |
55 | cmp r2,#16 @ If it's short don't bother with anything clever |
56 | blt 20f |
57 | |
58 | tst r0, #7 @ If it's already aligned skip the next bit |
59 | beq 10f |
60 | |
61 | @ Work up to an aligned point |
62 | 5: |
63 | ldrb r3, [r0],#1 |
64 | subs r2, r2, #1 |
65 | cmp r3, r1 |
66 | beq 50f @ If it matches exit found |
67 | tst r0, #7 |
68 | bne 5b @ If not aligned yet then do next byte |
69 | |
70 | 10: |
71 | @ At this point, we are aligned, we know we have at least 8 bytes to work with |
72 | push {r4,r5,r6,r7} |
73 | cfi_adjust_cfa_offset (16) |
74 | cfi_rel_offset (r4, 0) |
75 | cfi_rel_offset (r5, 4) |
76 | cfi_rel_offset (r6, 8) |
77 | cfi_rel_offset (r7, 12) |
78 | |
79 | cfi_remember_state |
80 | |
81 | orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes |
82 | orr r1, r1, r1, lsl #16 |
83 | bic r6, r2, #7 @ Number of double words to work with * 8 |
84 | mvns r7, #0 @ all F's |
85 | movs r3, #0 |
86 | |
87 | 15: |
88 | ldrd r4,r5, [r0],#8 |
89 | subs r6, r6, #8 |
90 | eor r4,r4, r1 @ Get it so that r4,r5 have 00's where the bytes match the target |
91 | eor r5,r5, r1 |
92 | uadd8 r4, r4, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
93 | sel r4, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
94 | uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
95 | sel r5, r4, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
96 | cbnz r5, 60f |
97 | bne 15b @ (Flags from the subs above) If not run out of bytes then go around again |
98 | |
99 | pop {r4,r5,r6,r7} |
100 | cfi_adjust_cfa_offset (-16) |
101 | cfi_restore (r4) |
102 | cfi_restore (r5) |
103 | cfi_restore (r6) |
104 | cfi_restore (r7) |
105 | |
106 | and r1,r1,#0xff @ Get r1 back to a single character from the expansion above |
107 | and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done |
108 | |
109 | 20: |
110 | cbz r2, 40f @ 0 length or hit the end already then not found |
111 | |
112 | 21: @ Post aligned section, or just a short call |
113 | ldrb r3,[r0],#1 |
114 | subs r2,r2,#1 |
115 | eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub |
116 | cbz r3, 50f |
117 | bne 21b @ on r2 flags |
118 | |
119 | 40: |
120 | movs r0,#0 @ not found |
121 | DO_RET(lr) |
122 | |
123 | 50: |
124 | subs r0,r0,#1 @ found |
125 | DO_RET(lr) |
126 | |
127 | 60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was |
128 | @ r0 points to the start of the double word after the one that was tested |
129 | @ r4 has the 00/ff pattern for the first word, r5 has the chained value |
130 | cfi_restore_state |
131 | cmp r4, #0 |
132 | itte eq |
133 | moveq r4, r5 @ the end is in the 2nd word |
134 | subeq r0,r0,#3 @ Points to 2nd byte of 2nd word |
135 | subne r0,r0,#7 @ or 2nd byte of 1st word |
136 | |
137 | @ r0 currently points to the 2nd byte of the word containing the hit |
138 | tst r4, # CHARTSTMASK(0) @ 1st character |
139 | bne 61f |
140 | adds r0,r0,#1 |
141 | tst r4, # CHARTSTMASK(1) @ 2nd character |
142 | ittt eq |
143 | addeq r0,r0,#1 |
144 | tsteq r4, # (3<<15) @ 2nd & 3rd character |
145 | @ If not the 3rd must be the last one |
146 | addeq r0,r0,#1 |
147 | |
148 | 61: |
149 | pop {r4,r5,r6,r7} |
150 | cfi_adjust_cfa_offset (-16) |
151 | cfi_restore (r4) |
152 | cfi_restore (r5) |
153 | cfi_restore (r6) |
154 | cfi_restore (r7) |
155 | |
156 | subs r0,r0,#1 |
157 | DO_RET(lr) |
158 | |
159 | END(memchr) |
160 | libc_hidden_builtin_def (memchr) |
161 | |