1/* Copyright (C) 2012-2022 Free Software Foundation, Inc.
2
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* Assumptions:
20 *
21 * ARMv8-a, AArch64.
22 * MTE compatible.
23 */
24
25#include <sysdep.h>
26
27#define REP8_01 0x0101010101010101
28#define REP8_7f 0x7f7f7f7f7f7f7f7f
29
30/* Parameters and result. */
31#define src1 x0
32#define src2 x1
33#define result x0
34
35/* Internal variables. */
36#define data1 x2
37#define data1w w2
38#define data2 x3
39#define data2w w3
40#define has_nul x4
41#define diff x5
42#define off1 x5
43#define syndrome x6
44#define tmp x6
45#define data3 x7
46#define zeroones x8
47#define shift x9
48#define off2 x10
49
50/* On big-endian early bytes are at MSB and on little-endian LSB.
51 LS_FW means shifting towards early bytes. */
52#ifdef __AARCH64EB__
53# define LS_FW lsl
54#else
55# define LS_FW lsr
56#endif
57
58/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
59 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
60 can be done in parallel across the entire word.
61 Since carry propagation makes 0x1 bytes before a NUL byte appear
62 NUL too in big-endian, byte-reverse the data before the NUL check. */
63
64ENTRY(strcmp)
65 PTR_ARG (0)
66 PTR_ARG (1)
67 sub off2, src2, src1
68 mov zeroones, REP8_01
69 and tmp, src1, 7
70 tst off2, 7
71 b.ne L(misaligned8)
72 cbnz tmp, L(mutual_align)
73
74 .p2align 4
75
76L(loop_aligned):
77 ldr data2, [src1, off2]
78 ldr data1, [src1], 8
79L(start_realigned):
80#ifdef __AARCH64EB__
81 rev tmp, data1
82 sub has_nul, tmp, zeroones
83 orr tmp, tmp, REP8_7f
84#else
85 sub has_nul, data1, zeroones
86 orr tmp, data1, REP8_7f
87#endif
88 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */
89 ccmp data1, data2, 0, eq
90 b.eq L(loop_aligned)
91#ifdef __AARCH64EB__
92 rev has_nul, has_nul
93#endif
94 eor diff, data1, data2
95 orr syndrome, diff, has_nul
96L(end):
97#ifndef __AARCH64EB__
98 rev syndrome, syndrome
99 rev data1, data1
100 rev data2, data2
101#endif
102 clz shift, syndrome
103 /* The most-significant-non-zero bit of the syndrome marks either the
104 first bit that is different, or the top bit of the first zero byte.
105 Shifting left now will bring the critical information into the
106 top bits. */
107 lsl data1, data1, shift
108 lsl data2, data2, shift
109 /* But we need to zero-extend (char is unsigned) the value and then
110 perform a signed 32-bit subtraction. */
111 lsr data1, data1, 56
112 sub result, data1, data2, lsr 56
113 ret
114
115 .p2align 4
116
117L(mutual_align):
118 /* Sources are mutually aligned, but are not currently at an
119 alignment boundary. Round down the addresses and then mask off
120 the bytes that precede the start point. */
121 bic src1, src1, 7
122 ldr data2, [src1, off2]
123 ldr data1, [src1], 8
124 neg shift, src2, lsl 3 /* Bits to alignment -64. */
125 mov tmp, -1
126 LS_FW tmp, tmp, shift
127 orr data1, data1, tmp
128 orr data2, data2, tmp
129 b L(start_realigned)
130
131L(misaligned8):
132 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
133 checking to make sure that we don't access beyond the end of SRC2. */
134 cbz tmp, L(src1_aligned)
135L(do_misaligned):
136 ldrb data1w, [src1], 1
137 ldrb data2w, [src2], 1
138 cmp data1w, 0
139 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
140 b.ne L(done)
141 tst src1, 7
142 b.ne L(do_misaligned)
143
144L(src1_aligned):
145 neg shift, src2, lsl 3
146 bic src2, src2, 7
147 ldr data3, [src2], 8
148#ifdef __AARCH64EB__
149 rev data3, data3
150#endif
151 lsr tmp, zeroones, shift
152 orr data3, data3, tmp
153 sub has_nul, data3, zeroones
154 orr tmp, data3, REP8_7f
155 bics has_nul, has_nul, tmp
156 b.ne L(tail)
157
158 sub off1, src2, src1
159
160 .p2align 4
161
162L(loop_unaligned):
163 ldr data3, [src1, off1]
164 ldr data2, [src1, off2]
165#ifdef __AARCH64EB__
166 rev data3, data3
167#endif
168 sub has_nul, data3, zeroones
169 orr tmp, data3, REP8_7f
170 ldr data1, [src1], 8
171 bics has_nul, has_nul, tmp
172 ccmp data1, data2, 0, eq
173 b.eq L(loop_unaligned)
174
175 lsl tmp, has_nul, shift
176#ifdef __AARCH64EB__
177 rev tmp, tmp
178#endif
179 eor diff, data1, data2
180 orr syndrome, diff, tmp
181 cbnz syndrome, L(end)
182L(tail):
183 ldr data1, [src1]
184 neg shift, shift
185 lsr data2, data3, shift
186 lsr has_nul, has_nul, shift
187#ifdef __AARCH64EB__
188 rev data2, data2
189 rev has_nul, has_nul
190#endif
191 eor diff, data1, data2
192 orr syndrome, diff, has_nul
193 b L(end)
194
195L(done):
196 sub result, data1, data2
197 ret
198
199END(strcmp)
200libc_hidden_builtin_def (strcmp)
201

source code of glibc/sysdeps/aarch64/strcmp.S