Warning: That file was not part of the compilation database. It may have many parsing errors.

1/* memchr - find a character in a memory zone
2
3 Copyright (C) 2015-2019 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <http://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64
26 * Neon Available.
27 */
28
29#ifndef MEMCHR
30# define MEMCHR __memchr
31#endif
32
33/* Arguments and results. */
34#define srcin x0
35#define chrin w1
36#define cntin x2
37
38#define result x0
39
40#define src x3
41#define tmp x4
42#define wtmp2 w5
43#define synd x6
44#define soff x9
45#define cntrem x10
46
47#define vrepchr v0
48#define vdata1 v1
49#define vdata2 v2
50#define vhas_chr1 v3
51#define vhas_chr2 v4
52#define vrepmask v5
53#define vend v6
54
55/*
56 * Core algorithm:
57 *
58 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
59 * per byte. For each tuple, bit 0 is set if the relevant byte matched the
60 * requested character and bit 1 is not used (faster than using a 32bit
61 * syndrome). Since the bits in the syndrome reflect exactly the order in which
62 * things occur in the original string, counting trailing zeros allows to
63 * identify exactly which byte has matched.
64 */
65
66ENTRY (MEMCHR)
67 /* Do not dereference srcin if no bytes to compare. */
68 cbz cntin, L(zero_length)
69 /*
70 * Magic constant 0x40100401 allows us to identify which lane matches
71 * the requested byte.
72 */
73 mov wtmp2, #0x0401
74 movk wtmp2, #0x4010, lsl #16
75 dup vrepchr.16b, chrin
76 /* Work with aligned 32-byte chunks */
77 bic src, srcin, #31
78 dup vrepmask.4s, wtmp2
79 ands soff, srcin, #31
80 and cntrem, cntin, #31
81 b.eq L(loop)
82
83 /*
84 * Input string is not 32-byte aligned. We calculate the syndrome
85 * value for the aligned 32 bytes block containing the first bytes
86 * and mask the irrelevant part.
87 */
88
89 ld1 {vdata1.16b, vdata2.16b}, [src], #32
90 sub tmp, soff, #32
91 adds cntin, cntin, tmp
92 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
93 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
94 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
95 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
96 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
97 addp vend.16b, vend.16b, vend.16b /* 128->64 */
98 mov synd, vend.2d[0]
99 /* Clear the soff*2 lower bits */
100 lsl tmp, soff, #1
101 lsr synd, synd, tmp
102 lsl synd, synd, tmp
103 /* The first block can also be the last */
104 b.ls L(masklast)
105 /* Have we found something already? */
106 cbnz synd, L(tail)
107
108L(loop):
109 ld1 {vdata1.16b, vdata2.16b}, [src], #32
110 subs cntin, cntin, #32
111 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
112 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
113 /* If we're out of data we finish regardless of the result */
114 b.ls L(end)
115 /* Use a fast check for the termination condition */
116 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
117 addp vend.2d, vend.2d, vend.2d
118 mov synd, vend.2d[0]
119 /* We're not out of data, loop if we haven't found the character */
120 cbz synd, L(loop)
121
122L(end):
123 /* Termination condition found, let's calculate the syndrome value */
124 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
125 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
126 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
127 addp vend.16b, vend.16b, vend.16b /* 128->64 */
128 mov synd, vend.2d[0]
129 /* Only do the clear for the last possible block */
130 b.hi L(tail)
131
132L(masklast):
133 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
134 add tmp, cntrem, soff
135 and tmp, tmp, #31
136 sub tmp, tmp, #32
137 neg tmp, tmp, lsl #1
138 lsl synd, synd, tmp
139 lsr synd, synd, tmp
140
141L(tail):
142 /* Count the trailing zeros using bit reversing */
143 rbit synd, synd
144 /* Compensate the last post-increment */
145 sub src, src, #32
146 /* Check that we have found a character */
147 cmp synd, #0
148 /* And count the leading zeros */
149 clz synd, synd
150 /* Compute the potential result */
151 add result, src, synd, lsr #1
152 /* Select result or NULL */
153 csel result, xzr, result, eq
154 ret
155
156L(zero_length):
157 mov result, #0
158 ret
159END (MEMCHR)
160weak_alias (MEMCHR, memchr)
161libc_hidden_builtin_def (memchr)
162

Warning: That file was not part of the compilation database. It may have many parsing errors.