1/* strchr - find a character in a string
2
3 Copyright (C) 2014-2022 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin x0
30#define chrin w1
31#define result x0
32
33#define src x2
34#define tmp1 x1
35#define wtmp2 w3
36#define tmp3 x3
37
38#define vrepchr v0
39#define vdata v1
40#define qdata q1
41#define vhas_nul v2
42#define vhas_chr v3
43#define vrepmask v4
44#define vrepmask2 v5
45#define vend v6
46#define dend d6
47
48/* Core algorithm.
49
50 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
51 per byte. For even bytes, bits 0-1 are set if the relevant byte matched the
52 requested character, bits 2-3 are set if the byte is NUL (or matched), and
53 bits 4-7 are not used and must be zero if none of bits 0-3 are set). Odd
54 bytes set bits 4-7 so that adjacent bytes can be merged. Since the bits
55 in the syndrome reflect the order in which things occur in the original
56 string, counting trailing zeros identifies exactly which byte matched. */
57
58ENTRY (strchr)
59 PTR_ARG (0)
60 bic src, srcin, 15
61 dup vrepchr.16b, chrin
62 ld1 {vdata.16b}, [src]
63 mov wtmp2, 0x3003
64 dup vrepmask.8h, wtmp2
65 cmeq vhas_nul.16b, vdata.16b, 0
66 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
67 mov wtmp2, 0xf00f
68 dup vrepmask2.8h, wtmp2
69
70 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
71 and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
72 lsl tmp3, srcin, 2
73 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
74
75 fmov tmp1, dend
76 lsr tmp1, tmp1, tmp3
77 cbz tmp1, L(loop)
78
79 rbit tmp1, tmp1
80 clz tmp1, tmp1
81 /* Tmp1 is an even multiple of 2 if the target character was
82 found first. Otherwise we've found the end of string. */
83 tst tmp1, 2
84 add result, srcin, tmp1, lsr 2
85 csel result, result, xzr, eq
86 ret
87
88 .p2align 4
89L(loop):
90 ldr qdata, [src, 16]!
91 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
92 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
93 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
94 fmov tmp1, dend
95 cbz tmp1, L(loop)
96
97#ifdef __AARCH64EB__
98 bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b
99 and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
100 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
101 fmov tmp1, dend
102#else
103 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
104 and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
105 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
106 fmov tmp1, dend
107 rbit tmp1, tmp1
108#endif
109 clz tmp1, tmp1
110 /* Tmp1 is an even multiple of 2 if the target character was
111 found first. Otherwise we've found the end of string. */
112 tst tmp1, 2
113 add result, src, tmp1, lsr 2
114 csel result, result, xzr, eq
115 ret
116
117END (strchr)
118libc_hidden_builtin_def (strchr)
119weak_alias (strchr, index)
120

source code of glibc/sysdeps/aarch64/strchr.S