1/* Copyright (C) 2012-2022 Free Software Foundation, Inc.
2
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20
21/* Assumptions:
22 *
23 * ARMv8-a, AArch64, Advanced SIMD.
24 * MTE compatible.
25 */
26
27#ifndef STRLEN
28# define STRLEN __strlen
29#endif
30
31#define srcin x0
32#define result x0
33
34#define src x1
35#define synd x2
36#define tmp x3
37#define wtmp w3
38#define shift x4
39
40#define data q0
41#define vdata v0
42#define vhas_nul v1
43#define vrepmask v2
44#define vend v3
45#define dend d3
46
47/* Core algorithm:
48
49 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
50 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
51 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
52 set likewise for odd bytes so that adjacent bytes can be merged. Since the
53 bits in the syndrome reflect the order in which things occur in the original
54 string, counting trailing zeros identifies exactly which byte matched. */
55
56ENTRY (STRLEN)
57 PTR_ARG (0)
58 bic src, srcin, 15
59 mov wtmp, 0xf00f
60 ld1 {vdata.16b}, [src]
61 dup vrepmask.8h, wtmp
62 cmeq vhas_nul.16b, vdata.16b, 0
63 lsl shift, srcin, 2
64 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
65 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
66 fmov synd, dend
67 lsr synd, synd, shift
68 cbz synd, L(loop)
69
70 rbit synd, synd
71 clz result, synd
72 lsr result, result, 2
73 ret
74
75 .p2align 5
76L(loop):
77 ldr data, [src, 16]!
78 cmeq vhas_nul.16b, vdata.16b, 0
79 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
80 fmov synd, dend
81 cbz synd, L(loop)
82
83 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
84 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
85 sub result, src, srcin
86 fmov synd, dend
87#ifndef __AARCH64EB__
88 rbit synd, synd
89#endif
90 clz tmp, synd
91 add result, result, tmp, lsr 2
92 ret
93
94END (STRLEN)
95weak_alias (STRLEN, strlen)
96libc_hidden_builtin_def (strlen)
97

source code of glibc/sysdeps/aarch64/strlen.S