Warning: That file was not part of the compilation database. It may have many parsing errors.

1/* Strlen implementation that uses ASIMD instructions for load and NULL checks.
2 Copyright (C) 2018-2019 Free Software Foundation, Inc.
3
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21
22/* Assumptions:
23
24 ARMv8-a, AArch64, ASIMD, unaligned accesses, min page size 4k. */
25
26/* To test the page crossing code path more thoroughly, compile with
27 -DTEST_PAGE_CROSS - this will force all calls through the slower
28 entry path. This option is not intended for production use. */
29
30/* Arguments and results. */
31#define srcin x0
32#define len x0
33
34/* Locals and temporaries. */
35#define src x1
36#define data1 x2
37#define data2 x3
38#define has_nul1 x4
39#define has_nul2 x5
40#define tmp1 x4
41#define tmp2 x5
42#define tmp3 x6
43#define tmp4 x7
44#define zeroones x8
45#define dataq q2
46#define datav v2
47#define datab2 b3
48#define dataq2 q3
49#define datav2 v3
50
51#ifdef TEST_PAGE_CROSS
52# define MIN_PAGE_SIZE 16
53#else
54# define MIN_PAGE_SIZE 4096
55#endif
56
57 /* Since strings are short on average, we check the first 16 bytes
58 of the string for a NUL character. In order to do an unaligned load
59 safely we have to do a page cross check first. If there is a NUL
60 byte we calculate the length from the 2 8-byte words using
61 conditional select to reduce branch mispredictions (it is unlikely
62 strlen_asimd will be repeatedly called on strings with the same
63 length).
64
65 If the string is longer than 16 bytes, we align src so don't need
66 further page cross checks, and process 16 bytes per iteration.
67
68 If the page cross check fails, we read 16 bytes from an aligned
69 address, remove any characters before the string, and continue
70 in the main loop using aligned loads. Since strings crossing a
71 page in the first 16 bytes are rare (probability of
72 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
73
74 AArch64 systems have a minimum page size of 4k. We don't bother
75 checking for larger page sizes - the cost of setting up the correct
76 page size is just not worth the extra gain from a small reduction in
77 the cases taking the slow path. Note that we only care about
78 whether the first fetch, which may be misaligned, crosses a page
79 boundary. */
80
81ENTRY_ALIGN (__strlen_asimd, 6)
82 DELOUSE (0)
83 DELOUSE (1)
84 and tmp1, srcin, MIN_PAGE_SIZE - 1
85 cmp tmp1, MIN_PAGE_SIZE - 16
86 b.gt L(page_cross)
87 ldr dataq, [srcin]
88#ifdef __AARCH64EB__
89 rev64 datav.16b, datav.16b
90#endif
91
92 /* Get the minimum value and keep going if it is not zero. */
93 uminv datab2, datav.16b
94 mov tmp1, datav2.d[0]
95 cbnz tmp1, L(main_loop_entry)
96
97 cmeq datav.16b, datav.16b, #0
98 mov data1, datav.d[0]
99 mov data2, datav.d[1]
100 cmp data1, 0
101 csel data1, data1, data2, ne
102 mov len, 8
103 rev data1, data1
104 clz tmp1, data1
105 csel len, xzr, len, ne
106 add len, len, tmp1, lsr 3
107 ret
108
109L(main_loop_entry):
110 bic src, srcin, 15
111
112L(main_loop):
113 ldr dataq, [src, 16]!
114L(page_cross_entry):
115 /* Get the minimum value and keep going if it is not zero. */
116 uminv datab2, datav.16b
117 mov tmp1, datav2.d[0]
118 cbnz tmp1, L(main_loop)
119
120L(tail):
121#ifdef __AARCH64EB__
122 rev64 datav.16b, datav.16b
123#endif
124 /* Set te NULL byte as 0xff and the rest as 0x00, move the data into a
125 pair of scalars and then compute the length from the earliest NULL
126 byte. */
127 cmeq datav.16b, datav.16b, #0
128 mov data1, datav.d[0]
129 mov data2, datav.d[1]
130 cmp data1, 0
131 csel data1, data1, data2, ne
132 sub len, src, srcin
133 rev data1, data1
134 add tmp2, len, 8
135 clz tmp1, data1
136 csel len, len, tmp2, ne
137 add len, len, tmp1, lsr 3
138 ret
139
140 /* Load 16 bytes from [srcin & ~15] and force the bytes that precede
141 srcin to 0xff, so we ignore any NUL bytes before the string.
142 Then continue in the aligned loop. */
143L(page_cross):
144 mov tmp3, 63
145 bic src, srcin, 15
146 and tmp1, srcin, 7
147 ands tmp2, srcin, 8
148 ldr dataq, [src]
149 lsl tmp1, tmp1, 3
150 csel tmp2, tmp2, tmp1, eq
151 csel tmp1, tmp1, tmp3, eq
152 mov tmp4, -1
153#ifdef __AARCH64EB__
154 /* Big-endian. Early bytes are at MSB. */
155 lsr tmp1, tmp4, tmp1
156 lsr tmp2, tmp4, tmp2
157#else
158 /* Little-endian. Early bytes are at LSB. */
159 lsl tmp1, tmp4, tmp1
160 lsl tmp2, tmp4, tmp2
161#endif
162 mov datav2.d[0], tmp1
163 mov datav2.d[1], tmp2
164 orn datav.16b, datav.16b, datav2.16b
165 b L(page_cross_entry)
166END (__strlen_asimd)
167weak_alias (__strlen_asimd, strlen_asimd)
168libc_hidden_builtin_def (strlen_asimd)
169

Warning: That file was not part of the compilation database. It may have many parsing errors.