memcpy.S source code [glibc/sysdeps/arm/memcpy.S]

1	/ Copyright (C) 2006-2022 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library. If not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	/ Thumb requires excessive IT insns here. /
20	#define NO_THUMB
21	#include <sysdep.h>
22	#include <arm-features.h>
23
24	/*
25	* Data preload for architectures that support it (ARM V5TE and above)
26	*/
27	#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
28	&& !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
29	&& !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
30	&& !defined (__ARM_ARCH_5T__))
31	#define PLD(code...) code
32	#else
33	#define PLD(code...)
34	#endif
35
36	/*
37	* This can be used to enable code to cacheline align the source pointer.
38	* Experiments on tested architectures (StrongARM and XScale) didn't show
39	* this a worthwhile thing to do. That might be different in the future.
40	*/
41	//#define CALGN(code...) code
42	#define CALGN(code...)
43
44	/*
45	* Endian independent macros for shifting bytes within registers.
46	*/
47	#ifndef __ARMEB__
48	#define PULL lsr
49	#define PUSH lsl
50	#else
51	#define PULL lsl
52	#define PUSH lsr
53	#endif
54
55	.text
56	.syntax unified
57
58	/ Prototype: void memcpy(void dest, const void src, size_t n); /*
59
60	ENTRY(memcpy)
61
62	push {r0, r4, lr}
63	cfi_adjust_cfa_offset (`12`)
64	cfi_rel_offset (r4, `4`)
65	cfi_rel_offset (lr, `8`)
66
67	cfi_remember_state
68
69	subs r2, r2, #`4`
70	blo `8f`
71	ands ip, r0, #`3`
72	PLD( pld [r1, #`0`] )
73	bne `9f`
74	ands ip, r1, #`3`
75	bne `10f`
76
77	`1`: subs r2, r2, #(`28`)
78	push {r5 - r8}
79	cfi_adjust_cfa_offset (`16`)
80	cfi_rel_offset (r5, `0`)
81	cfi_rel_offset (r6, `4`)
82	cfi_rel_offset (r7, `8`)
83	cfi_rel_offset (r8, `12`)
84	blo `5f`
85
86	CALGN( ands ip, r1, #`31` )
87	CALGN( rsb r3, ip, #`32` )
88	CALGN( sbcsne r4, r3, r2 ) @ C is always set here
89	CALGN( bcs `2f` )
90	CALGN( adr r4, `6f` )
91	CALGN( subs r2, r2, r3 ) @ C gets set
92	#ifndef ARM_ALWAYS_BX
93	CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`))
94	#else
95	CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`))
96	CALGN( bx r4 )
97	#endif
98
99	PLD( pld [r1, #`0`] )
100	`2`: PLD( cmp r2, #`96` )
101	PLD( pld [r1, #`28`] )
102	PLD( blo `4f` )
103	PLD( pld [r1, #`60`] )
104	PLD( pld [r1, #`92`] )
105
106	`3`: PLD( pld [r1, #`124`] )
107	`4`: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
108	subs r2, r2, #`32`
109	stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
110	bhs `3b`
111
112	`5`: ands ip, r2, #`28`
113	rsb ip, ip, #`32`
114	#ifndef ARM_ALWAYS_BX
115	/ C is always clear here. /
116	addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`)
117	b `7f`
118	#else
119	beq `7f`
120	push {r10}
121	cfi_adjust_cfa_offset (`4`)
122	cfi_rel_offset (r10, `0`)
123	`0`: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`)
124	/ If alignment is not perfect, then there will be some*
125	padding (nop) instructions between this BX and label 6.
126	The computation above assumed that two instructions
127	later is exactly the right spot. /*
128	add r10, #(`6f` - (`0b` + PC_OFS))
129	bx r10
130	#endif
131	.p2align ARM_BX_ALIGN_LOG2
132	`6`: nop
133	.p2align ARM_BX_ALIGN_LOG2
134	ldr r3, [r1], #`4`
135	.p2align ARM_BX_ALIGN_LOG2
136	ldr r4, [r1], #`4`
137	.p2align ARM_BX_ALIGN_LOG2
138	ldr r5, [r1], #`4`
139	.p2align ARM_BX_ALIGN_LOG2
140	ldr r6, [r1], #`4`
141	.p2align ARM_BX_ALIGN_LOG2
142	ldr r7, [r1], #`4`
143	.p2align ARM_BX_ALIGN_LOG2
144	ldr r8, [r1], #`4`
145	.p2align ARM_BX_ALIGN_LOG2
146	ldr lr, [r1], #`4`
147
148	#ifndef ARM_ALWAYS_BX
149	add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`)
150	nop
151	#else
152	`0`: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`)
153	/ If alignment is not perfect, then there will be some*
154	padding (nop) instructions between this BX and label 66.
155	The computation above assumed that two instructions
156	later is exactly the right spot. /*
157	add r10, #(`66f` - (`0b` + PC_OFS))
158	bx r10
159	#endif
160	.p2align ARM_BX_ALIGN_LOG2
161	`66`: nop
162	.p2align ARM_BX_ALIGN_LOG2
163	str r3, [r0], #`4`
164	.p2align ARM_BX_ALIGN_LOG2
165	str r4, [r0], #`4`
166	.p2align ARM_BX_ALIGN_LOG2
167	str r5, [r0], #`4`
168	.p2align ARM_BX_ALIGN_LOG2
169	str r6, [r0], #`4`
170	.p2align ARM_BX_ALIGN_LOG2
171	str r7, [r0], #`4`
172	.p2align ARM_BX_ALIGN_LOG2
173	str r8, [r0], #`4`
174	.p2align ARM_BX_ALIGN_LOG2
175	str lr, [r0], #`4`
176
177	#ifdef ARM_ALWAYS_BX
178	pop {r10}
179	cfi_adjust_cfa_offset (-`4`)
180	cfi_restore (r10)
181	#endif
182
183	CALGN( bcs `2b` )
184
185	`7`: pop {r5 - r8}
186	cfi_adjust_cfa_offset (-`16`)
187	cfi_restore (r5)
188	cfi_restore (r6)
189	cfi_restore (r7)
190	cfi_restore (r8)
191
192	`8`: movs r2, r2, lsl #`31`
193	ldrbne r3, [r1], #`1`
194	ldrbcs r4, [r1], #`1`
195	ldrbcs ip, [r1]
196	strbne r3, [r0], #`1`
197	strbcs r4, [r0], #`1`
198	strbcs ip, [r0]
199
200	#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
201	\|\| defined (ARM_ALWAYS_BX))
202	pop {r0, r4, lr}
203	cfi_adjust_cfa_offset (-`12`)
204	cfi_restore (r4)
205	cfi_restore (lr)
206	bx lr
207	#else
208	pop {r0, r4, pc}
209	#endif
210
211	cfi_restore_state
212
213	`9`: rsb ip, ip, #`4`
214	cmp ip, #`2`
215	ldrbgt r3, [r1], #`1`
216	ldrbge r4, [r1], #`1`
217	ldrb lr, [r1], #`1`
218	strbgt r3, [r0], #`1`
219	strbge r4, [r0], #`1`
220	subs r2, r2, ip
221	strb lr, [r0], #`1`
222	blo `8b`
223	ands ip, r1, #`3`
224	beq `1b`
225
226	`10`: bic r1, r1, #`3`
227	cmp ip, #`2`
228	ldr lr, [r1], #`4`
229	beq `17f`
230	bgt `18f`
231
232
233	.macro forward_copy_shift pull push
234
235	subs r2, r2, #`28`
236	blo `14f`
237
238	CALGN( ands ip, r1, #`31` )
239	CALGN( rsb ip, ip, #`32` )
240	CALGN( sbcsne r4, ip, r2 ) @ C is always set here
241	CALGN( subcc r2, r2, ip )
242	CALGN( bcc `15f` )
243
244	`11`: push {r5 - r8, r10}
245	cfi_adjust_cfa_offset (`20`)
246	cfi_rel_offset (r5, `0`)
247	cfi_rel_offset (r6, `4`)
248	cfi_rel_offset (r7, `8`)
249	cfi_rel_offset (r8, `12`)
250	cfi_rel_offset (r10, `16`)
251
252	PLD( pld [r1, #`0`] )
253	PLD( cmp r2, #`96` )
254	PLD( pld [r1, #`28`] )
255	PLD( blo `13f` )
256	PLD( pld [r1, #`60`] )
257	PLD( pld [r1, #`92`] )
258
259	`12`: PLD( pld [r1, #`124`] )
260	`13`: ldmia r1!, {r4, r5, r6, r7}
261	mov r3, lr, PULL #\pull
262	subs r2, r2, #`32`
263	ldmia r1!, {r8, r10, ip, lr}
264	orr r3, r3, r4, PUSH #\push
265	mov r4, r4, PULL #\pull
266	orr r4, r4, r5, PUSH #\push
267	mov r5, r5, PULL #\pull
268	orr r5, r5, r6, PUSH #\push
269	mov r6, r6, PULL #\pull
270	orr r6, r6, r7, PUSH #\push
271	mov r7, r7, PULL #\pull
272	orr r7, r7, r8, PUSH #\push
273	mov r8, r8, PULL #\pull
274	orr r8, r8, r10, PUSH #\push
275	mov r10, r10, PULL #\pull
276	orr r10, r10, ip, PUSH #\push
277	mov ip, ip, PULL #\pull
278	orr ip, ip, lr, PUSH #\push
279	stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
280	bhs `12b`
281
282	pop {r5 - r8, r10}
283	cfi_adjust_cfa_offset (-`20`)
284	cfi_restore (r5)
285	cfi_restore (r6)
286	cfi_restore (r7)
287	cfi_restore (r8)
288	cfi_restore (r10)
289
290	`14`: ands ip, r2, #`28`
291	beq `16f`
292
293	`15`: mov r3, lr, PULL #\pull
294	ldr lr, [r1], #`4`
295	subs ip, ip, #`4`
296	orr r3, r3, lr, PUSH #\push
297	str r3, [r0], #`4`
298	bgt `15b`
299	CALGN( cmp r2, #`0` )
300	CALGN( bge `11b` )
301
302	`16`: sub r1, r1, #(\push / `8`)
303	b `8b`
304
305	.endm
306
307
308	forward_copy_shift pull=`8` push=`24`
309
310	`17`: forward_copy_shift pull=`16` push=`16`
311
312	`18`: forward_copy_shift pull=`24` push=`8`
313
314	END(memcpy)
315	libc_hidden_builtin_def (memcpy)
316

source code of glibc/sysdeps/arm/memcpy.S