strlen.S source code [glibc/sysdeps/aarch64/strlen.S]

1	/ Copyright (C) 2012-2022 Free Software Foundation, Inc.*
2
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library. If not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <sysdep.h>
20
21	/ Assumptions:*
22	*
23	* ARMv8-a, AArch64, Advanced SIMD.
24	* MTE compatible.
25	*/
26
27	#ifndef STRLEN
28	# define STRLEN __strlen
29	#endif
30
31	#define srcin x0
32	#define result x0
33
34	#define src x1
35	#define synd x2
36	#define tmp x3
37	#define wtmp w3
38	#define shift x4
39
40	#define data q0
41	#define vdata v0
42	#define vhas_nul v1
43	#define vrepmask v2
44	#define vend v3
45	#define dend d3
46
47	/ Core algorithm:*
48
49	For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
50	per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
51	requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
52	set likewise for odd bytes so that adjacent bytes can be merged. Since the
53	bits in the syndrome reflect the order in which things occur in the original
54	string, counting trailing zeros identifies exactly which byte matched. /*
55
56	ENTRY (STRLEN)
57	PTR_ARG (`0`)
58	bic src, srcin, `15`
59	mov wtmp, `0xf00f`
60	ld1 {vdata`.16b`}, [src]
61	dup vrepmask`.8h`, wtmp
62	cmeq vhas_nul`.16b`, vdata`.16b`, `0`
63	lsl shift, srcin, `2`
64	and vhas_nul`.16b`, vhas_nul`.16b`, vrepmask`.16b`
65	addp vend`.16b`, vhas_nul`.16b`, vhas_nul`.16b` / 128->64 /
66	fmov synd, dend
67	lsr synd, synd, shift
68	cbz synd, L(loop)
69
70	rbit synd, synd
71	clz result, synd
72	lsr result, result, `2`
73	ret
74
75	.p2align `5`
76	L(loop):
77	ldr data, [src, `16`]!
78	cmeq vhas_nul`.16b`, vdata`.16b`, `0`
79	umaxp vend`.16b`, vhas_nul`.16b`, vhas_nul`.16b`
80	fmov synd, dend
81	cbz synd, L(loop)
82
83	and vhas_nul`.16b`, vhas_nul`.16b`, vrepmask`.16b`
84	addp vend`.16b`, vhas_nul`.16b`, vhas_nul`.16b` / 128->64 /
85	sub result, src, srcin
86	fmov synd, dend
87	#ifndef __AARCH64EB__
88	rbit synd, synd
89	#endif
90	clz tmp, synd
91	add result, result, tmp, lsr `2`
92	ret
93
94	END (STRLEN)
95	weak_alias (STRLEN, strlen)
96	libc_hidden_builtin_def (strlen)
97

source code of glibc/sysdeps/aarch64/strlen.S