1/* strchrnul - find a character or nul in a string
2
3 Copyright (C) 2014-2022 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin x0
30#define chrin w1
31#define result x0
32
33#define src x2
34#define tmp1 x1
35#define tmp2 x3
36#define tmp2w w3
37
38#define vrepchr v0
39#define vdata v1
40#define qdata q1
41#define vhas_nul v2
42#define vhas_chr v3
43#define vrepmask v4
44#define vend v5
45#define dend d5
46
47/* Core algorithm:
48
49 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
50 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
51 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
52 set likewise for odd bytes so that adjacent bytes can be merged. Since the
53 bits in the syndrome reflect the order in which things occur in the original
54 string, counting trailing zeros identifies exactly which byte matched. */
55
56ENTRY (__strchrnul)
57 PTR_ARG (0)
58 bic src, srcin, 15
59 dup vrepchr.16b, chrin
60 ld1 {vdata.16b}, [src]
61 mov tmp2w, 0xf00f
62 dup vrepmask.8h, tmp2w
63 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
64 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b
65 lsl tmp2, srcin, 2
66 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
67 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
68 fmov tmp1, dend
69 lsr tmp1, tmp1, tmp2 /* Mask padding bits. */
70 cbz tmp1, L(loop)
71
72 rbit tmp1, tmp1
73 clz tmp1, tmp1
74 add result, srcin, tmp1, lsr 2
75 ret
76
77 .p2align 4
78L(loop):
79 ldr qdata, [src, 16]!
80 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
81 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b
82 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b
83 fmov tmp1, dend
84 cbz tmp1, L(loop)
85
86 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
87 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
88 fmov tmp1, dend
89#ifndef __AARCH64EB__
90 rbit tmp1, tmp1
91#endif
92 clz tmp1, tmp1
93 add result, src, tmp1, lsr 2
94 ret
95
96END(__strchrnul)
97weak_alias (__strchrnul, strchrnul)
98

source code of glibc/sysdeps/aarch64/strchrnul.S