1/* strnlen - calculate the length of a string with limit.
2
3 Copyright (C) 2013-2022 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin x0
30#define cntin x1
31#define result x0
32
33#define src x2
34#define synd x3
35#define shift x4
36#define wtmp w4
37#define tmp x4
38#define cntrem x5
39
40#define qdata q0
41#define vdata v0
42#define vhas_chr v1
43#define vrepmask v2
44#define vend v3
45#define dend d3
46
47/*
48 Core algorithm:
49
50 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
51 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
52 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
53 set likewise for odd bytes so that adjacent bytes can be merged. Since the
54 bits in the syndrome reflect the order in which things occur in the original
55 string, counting trailing zeros identifies exactly which byte matched. */
56
57ENTRY (__strnlen)
58 PTR_ARG (0)
59 SIZE_ARG (1)
60 bic src, srcin, 15
61 mov wtmp, 0xf00f
62 cbz cntin, L(nomatch)
63 ld1 {vdata.16b}, [src], 16
64 dup vrepmask.8h, wtmp
65 cmeq vhas_chr.16b, vdata.16b, 0
66 lsl shift, srcin, 2
67 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
68 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
69 fmov synd, dend
70 lsr synd, synd, shift
71 cbz synd, L(start_loop)
72L(finish):
73 rbit synd, synd
74 clz synd, synd
75 lsr result, synd, 2
76 cmp cntin, result
77 csel result, cntin, result, ls
78 ret
79
80L(start_loop):
81 sub tmp, src, srcin
82 subs cntrem, cntin, tmp
83 b.ls L(nomatch)
84
85 /* Make sure that it won't overread by a 16-byte chunk */
86 add tmp, cntrem, 15
87 tbnz tmp, 4, L(loop32_2)
88
89 .p2align 5
90L(loop32):
91 ldr qdata, [src], 16
92 cmeq vhas_chr.16b, vdata.16b, 0
93 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
94 fmov synd, dend
95 cbnz synd, L(end)
96L(loop32_2):
97 ldr qdata, [src], 16
98 subs cntrem, cntrem, 32
99 cmeq vhas_chr.16b, vdata.16b, 0
100 b.ls L(end)
101 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
102 fmov synd, dend
103 cbz synd, L(loop32)
104
105L(end):
106 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
107 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
108 sub src, src, 16
109 mov synd, vend.d[0]
110 sub result, src, srcin
111#ifndef __AARCH64EB__
112 rbit synd, synd
113#endif
114 clz synd, synd
115 add result, result, synd, lsr 2
116 cmp cntin, result
117 csel result, cntin, result, ls
118 ret
119
120L(nomatch):
121 mov result, cntin
122 ret
123
124END (__strnlen)
125libc_hidden_def (__strnlen)
126weak_alias (__strnlen, strnlen)
127libc_hidden_def (strnlen)
128

source code of glibc/sysdeps/aarch64/strnlen.S