strnlen.S source code [glibc/sysdeps/aarch64/strnlen.S]

1	/ strnlen - calculate the length of a string with limit.*
2
3	Copyright (C) 2013-2022 Free Software Foundation, Inc.
4
5	This file is part of the GNU C Library.
6
7	The GNU C Library is free software; you can redistribute it and/or
8	modify it under the terms of the GNU Lesser General Public
9	License as published by the Free Software Foundation; either
10	version 2.1 of the License, or (at your option) any later version.
11
12	The GNU C Library is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	Lesser General Public License for more details.
16
17	You should have received a copy of the GNU Lesser General Public
18	License along with the GNU C Library. If not, see
19	<https://www.gnu.org/licenses/>. /*
20
21	#include <sysdep.h>
22
23	/ Assumptions:*
24	*
25	* ARMv8-a, AArch64, Advanced SIMD.
26	* MTE compatible.
27	*/
28
29	#define srcin x0
30	#define cntin x1
31	#define result x0
32
33	#define src x2
34	#define synd x3
35	#define shift x4
36	#define wtmp w4
37	#define tmp x4
38	#define cntrem x5
39
40	#define qdata q0
41	#define vdata v0
42	#define vhas_chr v1
43	#define vrepmask v2
44	#define vend v3
45	#define dend d3
46
47	/*
48	Core algorithm:
49
50	For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
51	per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
52	requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
53	set likewise for odd bytes so that adjacent bytes can be merged. Since the
54	bits in the syndrome reflect the order in which things occur in the original
55	string, counting trailing zeros identifies exactly which byte matched. /*
56
57	ENTRY (__strnlen)
58	PTR_ARG (`0`)
59	SIZE_ARG (`1`)
60	bic src, srcin, `15`
61	mov wtmp, `0xf00f`
62	cbz cntin, L(nomatch)
63	ld1 {vdata`.16b`}, [src], `16`
64	dup vrepmask`.8h`, wtmp
65	cmeq vhas_chr`.16b`, vdata`.16b`, `0`
66	lsl shift, srcin, `2`
67	and vhas_chr`.16b`, vhas_chr`.16b`, vrepmask`.16b`
68	addp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b` / 128->64 /
69	fmov synd, dend
70	lsr synd, synd, shift
71	cbz synd, L(start_loop)
72	L(finish):
73	rbit synd, synd
74	clz synd, synd
75	lsr result, synd, `2`
76	cmp cntin, result
77	csel result, cntin, result, ls
78	ret
79
80	L(start_loop):
81	sub tmp, src, srcin
82	subs cntrem, cntin, tmp
83	b.ls L(nomatch)
84
85	/ Make sure that it won't overread by a 16-byte chunk /
86	add tmp, cntrem, `15`
87	tbnz tmp, `4`, L(loop32_2)
88
89	.p2align `5`
90	L(loop32):
91	ldr qdata, [src], `16`
92	cmeq vhas_chr`.16b`, vdata`.16b`, `0`
93	umaxp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b` / 128->64 /
94	fmov synd, dend
95	cbnz synd, L(end)
96	L(loop32_2):
97	ldr qdata, [src], `16`
98	subs cntrem, cntrem, `32`
99	cmeq vhas_chr`.16b`, vdata`.16b`, `0`
100	b.ls L(end)
101	umaxp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b` / 128->64 /
102	fmov synd, dend
103	cbz synd, L(loop32)
104
105	L(end):
106	and vhas_chr`.16b`, vhas_chr`.16b`, vrepmask`.16b`
107	addp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b` / 128->64 /
108	sub src, src, `16`
109	mov synd, vend.d[`0`]
110	sub result, src, srcin
111	#ifndef __AARCH64EB__
112	rbit synd, synd
113	#endif
114	clz synd, synd
115	add result, result, synd, lsr `2`
116	cmp cntin, result
117	csel result, cntin, result, ls
118	ret
119
120	L(nomatch):
121	mov result, cntin
122	ret
123
124	END (__strnlen)
125	libc_hidden_def (__strnlen)
126	weak_alias (__strnlen, strnlen)
127	libc_hidden_def (strnlen)
128

source code of glibc/sysdeps/aarch64/strnlen.S