1/* memchr - find a character in a memory zone using base integer registers
2
3 Copyright (C) 2018-2022 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64
26 * Use base integer registers.
27 */
28
29#ifndef MEMCHR
30# define MEMCHR __memchr_nosimd
31#endif
32
33/* Arguments and results. */
34#define srcin x0
35#define chrin x1
36#define cntin x2
37
38#define result x0
39
40#define repchr x1
41
42#define tmp1 x2
43#define tmp2 x3
44#define tmp3 x4
45#define tmp4 x5
46
47#define src x6
48#define srcend x7
49#define srcend16 x8
50
51#define anymore x9
52
53#define zeroones x10
54
55#define data1 x11
56#define data2 x12
57
58#define has_chr1 x13
59#define has_chr2 x14
60
61#define REP8_01 0x0101010101010101
62#define REP8_7f 0x7f7f7f7f7f7f7f7f
63
64
65ENTRY_ALIGN (MEMCHR, 6)
66
67 PTR_ARG (0)
68 SIZE_ARG (2)
69
70 /* Do not dereference srcin if no bytes to compare. */
71 cbz cntin, L(none_chr)
72
73 /* Start address is 16-byte aligned or not? */
74 tst srcin, 15
75 bic src, srcin, 15
76
77 mov zeroones, REP8_01
78 and repchr, chrin, 255
79 /* Generate a qword integer as |c|c|c|c|c|c|c|c|. */
80 mul repchr, repchr, zeroones
81
82 add srcend, srcin, cntin
83 /*
84 * srcend16 is address of the block following the last block.
85 *
86 * [A block is 16-byte aligned and sized.]
87 */
88 add srcend16, srcend, 15
89 bic srcend16, srcend16, 15
90
91 b.eq L(loop)
92
93 /* Load the first block containing start address. */
94 ldp data1, data2, [src], 16
95
96 lsl tmp1, srcin, 3
97 mov tmp2, ~0
98#ifdef __AARCH64EB__
99 lsr tmp3, tmp2, tmp1
100#else
101 lsl tmp3, tmp2, tmp1
102#endif
103 /* Start address is in the first or the second qword? */
104 tst srcin, 8
105
106 /*
107 * Transform any byte in the block to zero using XOR operation,
108 * if that byte equals the char to search. In this way, searching
109 * the char becomes detecting zero in the resulting two qwords.
110 */
111 eor data1, data1, repchr
112 eor data2, data2, repchr
113
114 /*
115 * Set those unused bytes(before start address) to 0xff, so
116 * that they will not hit any zero detection.
117 */
118 orn tmp1, data1, tmp3
119 orn tmp2, data2, tmp3
120
121 csinv data1, tmp1, xzr, eq
122 csel data2, data2, tmp2, eq
123
124 /*
125 * When the first and last block are the same, there are two cases:
126 * o. Memory range to search is just in one block.
127 * ( start address - end address) < 0
128 *
129 * o. Memory range is so large that end address wrap-around.
130 * ( start address - end address) > 0
131 */
132 cmp srcin, srcend
133 ccmp src, srcend16, 0, mi
134 csetm anymore, ne
135 b L(find_chr)
136
137 .p2align 4
138L(loop):
139 ldp data1, data2, [src], 16
140
141 subs anymore, src, srcend16
142
143 /*
144 * Transform any byte in the block to zero using XOR operation,
145 * if that byte equals the char to search.
146 */
147 eor data1, data1, repchr
148 eor data2, data2, repchr
149
150L(find_chr):
151 /*
152 * Use the following integer test to find out if any byte in a
153 * qword is zero. If do not contain zero-valued byte, test result
154 * is zero.
155 *
156 * (qword - 0x0101010101010101) & ~(qword) & 0x8080808080808080
157 * =
158 * (qword - 0x0101010101010101) & ~(qword | 0x7f7f7f7f7f7f7f7f)
159 *
160 */
161 sub tmp1, data1, zeroones
162 sub tmp2, data2, zeroones
163
164 orr tmp3, data1, REP8_7f
165 orr tmp4, data2, REP8_7f
166
167 bic has_chr1, tmp1, tmp3
168 bic has_chr2, tmp2, tmp4
169
170 orr tmp1, has_chr1, has_chr2
171 ccmp tmp1, 0, 0, ne
172
173 b.eq L(loop)
174
175 cbz has_chr1, 1f
176 sub result, src, 16
177#ifdef __AARCH64EB__
178 rev data1, data1
179#else
180 rev has_chr1, has_chr1
181#endif
182 b L(done)
183
1841: cbz has_chr2, L(none_chr)
185 sub result, src, 8
186#ifdef __AARCH64EB__
187 rev data1, data2
188#else
189 rev has_chr1, has_chr2
190#endif
191
192L(done):
193#ifdef __AARCH64EB__
194 /*
195 * For big-endian, can not directly use has_chr1/has_chr2 because
196 * two qwords has been reversed after loading from memory.
197 * Thus, have to perform char detection on two qwords again, which
198 * should be byte-swapped this time.
199 */
200 sub tmp1, data1, zeroones
201 orr tmp3, data1, REP8_7f
202 bic has_chr1, tmp1, tmp3
203 rev has_chr1, has_chr1
204#endif
205
206 /*
207 * If the specified char is found in a qword, the corresponding
208 * byte of in has_chr has value of 1, while this is only true for
209 * the first occurrence, not other occurrences.
210 */
211 cmp anymore, 0
212 clz tmp1, has_chr1
213 add result, result, tmp1, lsr 3
214 ccmp result, srcend, 8, eq /* NZCV = 8000 */
215 csel result, result, xzr, mi
216 ret
217
218L(none_chr):
219 mov result, 0
220 ret
221
222END (MEMCHR)
223libc_hidden_builtin_def (MEMCHR)
224

source code of glibc/sysdeps/aarch64/multiarch/memchr_nosimd.S