1/* memchr - find a character in a memory zone
2
3 Copyright (C) 2015-2022 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#ifndef MEMCHR
30# define MEMCHR __memchr
31#endif
32
33/* Arguments and results. */
34#define srcin x0
35#define chrin w1
36#define cntin x2
37#define result x0
38
39#define src x3
40#define cntrem x4
41#define synd x5
42#define shift x6
43#define tmp x7
44#define wtmp w7
45
46#define vrepchr v0
47#define qdata q1
48#define vdata v1
49#define vhas_chr v2
50#define vrepmask v3
51#define vend v4
52#define dend d4
53
54/*
55 Core algorithm:
56 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
57 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
58 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
59 set likewise for odd bytes so that adjacent bytes can be merged. Since the
60 bits in the syndrome reflect the order in which things occur in the original
61 string, counting trailing zeros identifies exactly which byte matched. */
62
63ENTRY (MEMCHR)
64 PTR_ARG (0)
65 SIZE_ARG (2)
66 bic src, srcin, 15
67 cbz cntin, L(nomatch)
68 ld1 {vdata.16b}, [src]
69 dup vrepchr.16b, chrin
70 mov wtmp, 0xf00f
71 dup vrepmask.8h, wtmp
72 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
73 lsl shift, srcin, 2
74 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
75 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
76 fmov synd, dend
77 lsr synd, synd, shift
78 cbz synd, L(start_loop)
79
80 rbit synd, synd
81 clz synd, synd
82 add result, srcin, synd, lsr 2
83 cmp cntin, synd, lsr 2
84 csel result, result, xzr, hi
85 ret
86
87L(start_loop):
88 sub tmp, src, srcin
89 add tmp, tmp, 16
90 subs cntrem, cntin, tmp
91 b.ls L(nomatch)
92
93 /* Make sure that it won't overread by a 16-byte chunk */
94 add tmp, cntrem, 15
95 tbnz tmp, 4, L(loop32_2)
96
97 .p2align 4
98L(loop32):
99 ldr qdata, [src, 16]!
100 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
101 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
102 fmov synd, dend
103 cbnz synd, L(end)
104
105L(loop32_2):
106 ldr qdata, [src, 16]!
107 subs cntrem, cntrem, 32
108 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
109 b.ls L(end)
110 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
111 fmov synd, dend
112 cbz synd, L(loop32)
113L(end):
114 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
115 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
116 fmov synd, dend
117 add tmp, srcin, cntin
118 sub cntrem, tmp, src
119#ifndef __AARCH64EB__
120 rbit synd, synd
121#endif
122 clz synd, synd
123 cmp cntrem, synd, lsr 2
124 add result, src, synd, lsr 2
125 csel result, result, xzr, hi
126 ret
127
128L(nomatch):
129 mov result, 0
130 ret
131
132END (MEMCHR)
133weak_alias (MEMCHR, memchr)
134libc_hidden_builtin_def (memchr)
135

source code of glibc/sysdeps/aarch64/memchr.S