1/* strcpy/stpcpy - copy a string returning pointer to start/end.
2 Copyright (C) 2013-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
20
21 To test the page crossing code path more thoroughly, compile with
22 -DSTRCPY_TEST_PAGE_CROSS - this will force all unaligned copies through
23 the slower entry path. This option is not intended for production use. */
24
25#include <sysdep.h>
26
27/* Assumptions:
28 *
29 * ARMv8-a, AArch64, Advanced SIMD.
30 * MTE compatible.
31 */
32
33/* Arguments and results. */
34#define dstin x0
35#define srcin x1
36#define result x0
37
38#define src x2
39#define dst x3
40#define len x4
41#define synd x4
42#define tmp x5
43#define wtmp w5
44#define shift x5
45#define data1 x6
46#define dataw1 w6
47#define data2 x7
48#define dataw2 w7
49
50#define dataq q0
51#define vdata v0
52#define vhas_nul v1
53#define vrepmask v2
54#define vend v3
55#define dend d3
56#define dataq2 q1
57
58#ifdef BUILD_STPCPY
59# define STRCPY __stpcpy
60# define IFSTPCPY(X,...) X,__VA_ARGS__
61#else
62# define STRCPY strcpy
63# define IFSTPCPY(X,...)
64#endif
65
66/* Core algorithm:
67
68 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
69 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
70 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
71 set likewise for odd bytes so that adjacent bytes can be merged. Since the
72 bits in the syndrome reflect the order in which things occur in the original
73 string, counting trailing zeros identifies exactly which byte matched. */
74
75ENTRY (STRCPY)
76 PTR_ARG (0)
77 PTR_ARG (1)
78 bic src, srcin, 15
79 mov wtmp, 0xf00f
80 ld1 {vdata.16b}, [src]
81 dup vrepmask.8h, wtmp
82 cmeq vhas_nul.16b, vdata.16b, 0
83 lsl shift, srcin, 2
84 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
85 addp vend.16b, vhas_nul.16b, vhas_nul.16b
86 fmov synd, dend
87 lsr synd, synd, shift
88 cbnz synd, L(tail)
89
90 ldr dataq, [src, 16]!
91 cmeq vhas_nul.16b, vdata.16b, 0
92 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
93 addp vend.16b, vhas_nul.16b, vhas_nul.16b
94 fmov synd, dend
95 cbz synd, L(start_loop)
96
97#ifndef __AARCH64EB__
98 rbit synd, synd
99#endif
100 sub tmp, src, srcin
101 clz len, synd
102 add len, tmp, len, lsr 2
103 tbz len, 4, L(less16)
104 sub tmp, len, 15
105 ldr dataq, [srcin]
106 ldr dataq2, [srcin, tmp]
107 str dataq, [dstin]
108 str dataq2, [dstin, tmp]
109 IFSTPCPY (add result, dstin, len)
110 ret
111
112 .p2align 4,,8
113L(tail):
114 rbit synd, synd
115 clz len, synd
116 lsr len, len, 2
117
118 .p2align 4
119L(less16):
120 tbz len, 3, L(less8)
121 sub tmp, len, 7
122 ldr data1, [srcin]
123 ldr data2, [srcin, tmp]
124 str data1, [dstin]
125 str data2, [dstin, tmp]
126 IFSTPCPY (add result, dstin, len)
127 ret
128
129 .p2align 4
130L(less8):
131 subs tmp, len, 3
132 b.lo L(less4)
133 ldr dataw1, [srcin]
134 ldr dataw2, [srcin, tmp]
135 str dataw1, [dstin]
136 str dataw2, [dstin, tmp]
137 IFSTPCPY (add result, dstin, len)
138 ret
139
140L(less4):
141 cbz len, L(zerobyte)
142 ldrh dataw1, [srcin]
143 strh dataw1, [dstin]
144L(zerobyte):
145 strb wzr, [dstin, len]
146 IFSTPCPY (add result, dstin, len)
147 ret
148
149 .p2align 4
150L(start_loop):
151 sub len, src, srcin
152 ldr dataq2, [srcin]
153 add dst, dstin, len
154 str dataq2, [dstin]
155
156 .p2align 5
157L(loop):
158 str dataq, [dst], 16
159 ldr dataq, [src, 16]!
160 cmeq vhas_nul.16b, vdata.16b, 0
161 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
162 fmov synd, dend
163 cbz synd, L(loop)
164
165 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
166 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
167 fmov synd, dend
168#ifndef __AARCH64EB__
169 rbit synd, synd
170#endif
171 clz len, synd
172 lsr len, len, 2
173 sub tmp, len, 15
174 ldr dataq, [src, tmp]
175 str dataq, [dst, tmp]
176 IFSTPCPY (add result, dst, len)
177 ret
178
179END (STRCPY)
180
181#ifdef BUILD_STPCPY
182weak_alias (__stpcpy, stpcpy)
183libc_hidden_def (__stpcpy)
184libc_hidden_builtin_def (stpcpy)
185#else
186libc_hidden_builtin_def (strcpy)
187#endif
188

source code of glibc/sysdeps/aarch64/strcpy.S