memchr.S source code [glibc/sysdeps/arm/armv6t2/memchr.S]

1	/ Copyright (C) 2011-2024 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library. If not, see
16	<https://www.gnu.org/licenses/>. /*
17
18	#include <sysdep.h>
19
20	@ This memchr routine is optimised on a Cortex-A9 and should work on all ARMv7
21	@ and ARMv6T2 processors. It has a fast path for short sizes, and has an
22	@ optimised path for large data sets; the worst case is finding the match early
23	@ in a large data set.
24	@ Note: The use of cbz/cbnz means it's Thumb only
25
26	@ `2011`-`07`-`15` david.gilbert@linaro.org
27	@ Copy from Cortex strings release `21` and change license
28	@ http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/view/head:/src/linaro-a9/memchr.S
29	@ Change function declarations/entry/exit
30	@ `2011`-`12`-`01` david.gilbert@linaro.org
31	@ Add some fixes from comments received (including use of ldrd instead ldm)
32	@ `2011`-`12`-`07` david.gilbert@linaro.org
33	@ Removed cbz from align loop - can't be taken
34
35	@ this lets us check a flag in a `00`/ff byte easily in either endianness
36	#ifdef __ARMEB__
37	#define CHARTSTMASK(c) 1<<(31-(c*8))
38	#else
39	#define CHARTSTMASK(c) 1<<(c*8)
40	#endif
41	.syntax unified
42
43	.text
44	.thumb
45	.thumb_func
46	.global memchr
47	.type memchr,%function
48	ENTRY(memchr)
49	@ r0 = start of memory to scan
50	@ r1 = character to look for
51	@ r2 = length
52	@ returns r0 = pointer to character or NULL if not found
53	and r1,r1,#`0xff` @ Don't think we can trust the caller to actually pass a char
54
55	cmp r2,#`16` @ If it`'s short don'`t bother with anything clever
56	blt `20f`
57
58	tst r0, #`7` @ If it's already aligned skip the next bit
59	beq `10f`
60
61	@ Work up to an aligned point
62	`5`:
63	ldrb r3, [r0],#`1`
64	subs r2, r2, #`1`
65	cmp r3, r1
66	beq `50f` @ If it matches exit found
67	tst r0, #`7`
68	bne `5b` @ If not aligned yet then do next byte
69
70	`10`:
71	@ At this point, we are aligned, we know we have at least `8` bytes to work with
72	push {r4,r5,r6,r7}
73	cfi_adjust_cfa_offset (`16`)
74	cfi_rel_offset (r4, `0`)
75	cfi_rel_offset (r5, `4`)
76	cfi_rel_offset (r6, `8`)
77	cfi_rel_offset (r7, `12`)
78
79	cfi_remember_state
80
81	orr r1, r1, r1, lsl #`8` @ expand the match word across to all bytes
82	orr r1, r1, r1, lsl #`16`
83	bic r6, r2, #`7` @ Number of double words to work with * `8`
84	mvns r7, #`0` @ all F's
85	movs r3, #`0`
86
87	`15`:
88	ldrd r4,r5, [r0],#`8`
89	subs r6, r6, #`8`
90	eor r4,r4, r1 @ Get it so that r4,r5 have `00`'s where the bytes match the target
91	eor r5,r5, r1
92	uadd8 r4, r4, r7 @ Parallel add `0xff` - sets the GE bits for anything that wasn't 0
93	sel r4, r3, r7 @ bytes are `00` for none-`00` bytes, or ff for `00` bytes - NOTE INVERSION
94	uadd8 r5, r5, r7 @ Parallel add `0xff` - sets the GE bits for anything that wasn't 0
95	sel r5, r4, r7 @ chained....bytes are `00` for none-`00` bytes, or ff for `00` bytes - NOTE INVERSION
96	cbnz r5, `60f`
97	bne `15b` @ (Flags from the subs above) If not run out of bytes then go around again
98
99	pop {r4,r5,r6,r7}
100	cfi_adjust_cfa_offset (-`16`)
101	cfi_restore (r4)
102	cfi_restore (r5)
103	cfi_restore (r6)
104	cfi_restore (r7)
105
106	and r1,r1,#`0xff` @ Get r1 back to a single character from the expansion above
107	and r2,r2,#`7` @ Leave the count remaining as the number after the double words have been done
108
109	`20`:
110	cbz r2, `40f` @ `0` length or hit the end already then not found
111
112	`21`: @ Post aligned section, or just a short call
113	ldrb r3,[r0],#`1`
114	subs r2,r2,#`1`
115	eor r3,r3,r1 @ r3 = `0` if match - doesn't break flags from sub
116	cbz r3, `50f`
117	bne `21b` @ on r2 flags
118
119	`40`:
120	movs r0,#`0` @ not found
121	DO_RET(lr)
122
123	`50`:
124	subs r0,r0,#`1` @ found
125	DO_RET(lr)
126
127	`60`: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
128	@ r0 points to the start of the double word after the one that was tested
129	@ r4 has the `00`/ff pattern for the first word, r5 has the chained value
130	cfi_restore_state
131	cmp r4, #`0`
132	itte eq
133	moveq r4, r5 @ the end is in the `2nd` word
134	subeq r0,r0,#`3` @ Points to `2nd` byte of `2nd` word
135	subne r0,r0,#`7` @ or `2nd` byte of `1st` word
136
137	@ r0 currently points to the `2nd` byte of the word containing the hit
138	tst r4, # CHARTSTMASK(`0`) @ `1st` character
139	bne `61f`
140	adds r0,r0,#`1`
141	tst r4, # CHARTSTMASK(`1`) @ `2nd` character
142	ittt eq
143	addeq r0,r0,#`1`
144	tsteq r4, # (`3`<<`15`) @ `2nd` & `3rd` character
145	@ If not the `3rd` must be the last one
146	addeq r0,r0,#`1`
147
148	`61`:
149	pop {r4,r5,r6,r7}
150	cfi_adjust_cfa_offset (-`16`)
151	cfi_restore (r4)
152	cfi_restore (r5)
153	cfi_restore (r6)
154	cfi_restore (r7)
155
156	subs r0,r0,#`1`
157	DO_RET(lr)
158
159	END(memchr)
160	libc_hidden_builtin_def (memchr)
161

source code of glibc/sysdeps/arm/armv6t2/memchr.S