strchrnul.S source code [glibc/sysdeps/aarch64/strchrnul.S]

1	/ strchrnul - find a character or nul in a string*
2
3	Copyright (C) 2014-2022 Free Software Foundation, Inc.
4
5	This file is part of the GNU C Library.
6
7	The GNU C Library is free software; you can redistribute it and/or
8	modify it under the terms of the GNU Lesser General Public
9	License as published by the Free Software Foundation; either
10	version 2.1 of the License, or (at your option) any later version.
11
12	The GNU C Library is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	Lesser General Public License for more details.
16
17	You should have received a copy of the GNU Lesser General Public
18	License along with the GNU C Library. If not, see
19	<https://www.gnu.org/licenses/>. /*
20
21	#include <sysdep.h>
22
23	/ Assumptions:*
24	*
25	* ARMv8-a, AArch64, Advanced SIMD.
26	* MTE compatible.
27	*/
28
29	#define srcin x0
30	#define chrin w1
31	#define result x0
32
33	#define src x2
34	#define tmp1 x1
35	#define tmp2 x3
36	#define tmp2w w3
37
38	#define vrepchr v0
39	#define vdata v1
40	#define qdata q1
41	#define vhas_nul v2
42	#define vhas_chr v3
43	#define vrepmask v4
44	#define vend v5
45	#define dend d5
46
47	/ Core algorithm:*
48
49	For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
50	per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
51	requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
52	set likewise for odd bytes so that adjacent bytes can be merged. Since the
53	bits in the syndrome reflect the order in which things occur in the original
54	string, counting trailing zeros identifies exactly which byte matched. /*
55
56	ENTRY (__strchrnul)
57	PTR_ARG (`0`)
58	bic src, srcin, `15`
59	dup vrepchr`.16b`, chrin
60	ld1 {vdata`.16b`}, [src]
61	mov tmp2w, `0xf00f`
62	dup vrepmask`.8h`, tmp2w
63	cmeq vhas_chr`.16b`, vdata`.16b`, vrepchr`.16b`
64	cmhs vhas_chr`.16b`, vhas_chr`.16b`, vdata`.16b`
65	lsl tmp2, srcin, `2`
66	and vhas_chr`.16b`, vhas_chr`.16b`, vrepmask`.16b`
67	addp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b` / 128->64 /
68	fmov tmp1, dend
69	lsr tmp1, tmp1, tmp2 / Mask padding bits. /
70	cbz tmp1, L(loop)
71
72	rbit tmp1, tmp1
73	clz tmp1, tmp1
74	add result, srcin, tmp1, lsr `2`
75	ret
76
77	.p2align `4`
78	L(loop):
79	ldr qdata, [src, `16`]!
80	cmeq vhas_chr`.16b`, vdata`.16b`, vrepchr`.16b`
81	cmhs vhas_chr`.16b`, vhas_chr`.16b`, vdata`.16b`
82	umaxp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b`
83	fmov tmp1, dend
84	cbz tmp1, L(loop)
85
86	and vhas_chr`.16b`, vhas_chr`.16b`, vrepmask`.16b`
87	addp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b` / 128->64 /
88	fmov tmp1, dend
89	#ifndef __AARCH64EB__
90	rbit tmp1, tmp1
91	#endif
92	clz tmp1, tmp1
93	add result, src, tmp1, lsr `2`
94	ret
95
96	END(__strchrnul)
97	weak_alias (__strchrnul, strchrnul)
98

source code of glibc/sysdeps/aarch64/strchrnul.S