1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * linux/arch/arm/lib/memmove.S |
4 | * |
5 | * Author: Nicolas Pitre |
6 | * Created: Sep 28, 2005 |
7 | * Copyright: (C) MontaVista Software Inc. |
8 | */ |
9 | |
10 | #include <linux/linkage.h> |
11 | #include <asm/assembler.h> |
12 | #include <asm/unwind.h> |
13 | |
14 | .text |
15 | |
16 | /* |
17 | * Prototype: void *memmove(void *dest, const void *src, size_t n); |
18 | * |
19 | * Note: |
20 | * |
21 | * If the memory regions don't overlap, we simply branch to memcpy which is |
22 | * normally a bit faster. Otherwise the copy is done going downwards. This |
23 | * is a transposition of the code from copy_template.S but with the copy |
24 | * occurring in the opposite direction. |
25 | */ |
26 | |
27 | ENTRY(__memmove) |
28 | WEAK(memmove) |
29 | UNWIND( .fnstart ) |
30 | |
31 | subs ip, r0, r1 |
32 | cmphi r2, ip |
33 | bls __memcpy |
34 | UNWIND( .fnend ) |
35 | |
36 | UNWIND( .fnstart ) |
37 | UNWIND( .save {r0, r4, fpreg, lr} ) |
38 | stmfd sp!, {r0, r4, UNWIND(fpreg,) lr} |
39 | UNWIND( .setfp fpreg, sp ) |
40 | UNWIND( mov fpreg, sp ) |
41 | add r1, r1, r2 |
42 | add r0, r0, r2 |
43 | subs r2, r2, #4 |
44 | blt 8f |
45 | ands ip, r0, #3 |
46 | PLD( pld [r1, #-4] ) |
47 | bne 9f |
48 | ands ip, r1, #3 |
49 | bne 10f |
50 | |
51 | 1: subs r2, r2, #(28) |
52 | stmfd sp!, {r5, r6, r8, r9} |
53 | blt 5f |
54 | |
55 | CALGN( ands ip, r0, #31 ) |
56 | CALGN( sbcsne r4, ip, r2 ) @ C is always set here |
57 | CALGN( bcs 2f ) |
58 | CALGN( adr r4, 6f ) |
59 | CALGN( subs r2, r2, ip ) @ C is set here |
60 | CALGN( rsb ip, ip, #32 ) |
61 | CALGN( add pc, r4, ip ) |
62 | |
63 | PLD( pld [r1, #-4] ) |
64 | 2: PLD( subs r2, r2, #96 ) |
65 | PLD( pld [r1, #-32] ) |
66 | PLD( blt 4f ) |
67 | PLD( pld [r1, #-64] ) |
68 | PLD( pld [r1, #-96] ) |
69 | |
70 | 3: PLD( pld [r1, #-128] ) |
71 | 4: ldmdb r1!, {r3, r4, r5, r6, r8, r9, ip, lr} |
72 | subs r2, r2, #32 |
73 | stmdb r0!, {r3, r4, r5, r6, r8, r9, ip, lr} |
74 | bge 3b |
75 | PLD( cmn r2, #96 ) |
76 | PLD( bge 4b ) |
77 | |
78 | 5: ands ip, r2, #28 |
79 | rsb ip, ip, #32 |
80 | addne pc, pc, ip @ C is always clear here |
81 | b 7f |
82 | 6: W(nop) |
83 | W(ldr) r3, [r1, #-4]! |
84 | W(ldr) r4, [r1, #-4]! |
85 | W(ldr) r5, [r1, #-4]! |
86 | W(ldr) r6, [r1, #-4]! |
87 | W(ldr) r8, [r1, #-4]! |
88 | W(ldr) r9, [r1, #-4]! |
89 | W(ldr) lr, [r1, #-4]! |
90 | |
91 | add pc, pc, ip |
92 | nop |
93 | W(nop) |
94 | W(str) r3, [r0, #-4]! |
95 | W(str) r4, [r0, #-4]! |
96 | W(str) r5, [r0, #-4]! |
97 | W(str) r6, [r0, #-4]! |
98 | W(str) r8, [r0, #-4]! |
99 | W(str) r9, [r0, #-4]! |
100 | W(str) lr, [r0, #-4]! |
101 | |
102 | CALGN( bcs 2b ) |
103 | |
104 | 7: ldmfd sp!, {r5, r6, r8, r9} |
105 | |
106 | 8: movs r2, r2, lsl #31 |
107 | ldrbne r3, [r1, #-1]! |
108 | ldrbcs r4, [r1, #-1]! |
109 | ldrbcs ip, [r1, #-1] |
110 | strbne r3, [r0, #-1]! |
111 | strbcs r4, [r0, #-1]! |
112 | strbcs ip, [r0, #-1] |
113 | ldmfd sp!, {r0, r4, UNWIND(fpreg,) pc} |
114 | |
115 | 9: cmp ip, #2 |
116 | ldrbgt r3, [r1, #-1]! |
117 | ldrbge r4, [r1, #-1]! |
118 | ldrb lr, [r1, #-1]! |
119 | strbgt r3, [r0, #-1]! |
120 | strbge r4, [r0, #-1]! |
121 | subs r2, r2, ip |
122 | strb lr, [r0, #-1]! |
123 | blt 8b |
124 | ands ip, r1, #3 |
125 | beq 1b |
126 | |
127 | 10: bic r1, r1, #3 |
128 | cmp ip, #2 |
129 | ldr r3, [r1, #0] |
130 | beq 17f |
131 | blt 18f |
132 | |
133 | |
134 | .macro backward_copy_shift push pull |
135 | |
136 | subs r2, r2, #28 |
137 | blt 14f |
138 | |
139 | CALGN( ands ip, r0, #31 ) |
140 | CALGN( sbcsne r4, ip, r2 ) @ C is always set here |
141 | CALGN( subcc r2, r2, ip ) |
142 | CALGN( bcc 15f ) |
143 | |
144 | 11: stmfd sp!, {r5, r6, r8 - r10} |
145 | |
146 | PLD( pld [r1, #-4] ) |
147 | PLD( subs r2, r2, #96 ) |
148 | PLD( pld [r1, #-32] ) |
149 | PLD( blt 13f ) |
150 | PLD( pld [r1, #-64] ) |
151 | PLD( pld [r1, #-96] ) |
152 | |
153 | 12: PLD( pld [r1, #-128] ) |
154 | 13: ldmdb r1!, {r8, r9, r10, ip} |
155 | mov lr, r3, lspush #\push |
156 | subs r2, r2, #32 |
157 | ldmdb r1!, {r3, r4, r5, r6} |
158 | orr lr, lr, ip, lspull #\pull |
159 | mov ip, ip, lspush #\push |
160 | orr ip, ip, r10, lspull #\pull |
161 | mov r10, r10, lspush #\push |
162 | orr r10, r10, r9, lspull #\pull |
163 | mov r9, r9, lspush #\push |
164 | orr r9, r9, r8, lspull #\pull |
165 | mov r8, r8, lspush #\push |
166 | orr r8, r8, r6, lspull #\pull |
167 | mov r6, r6, lspush #\push |
168 | orr r6, r6, r5, lspull #\pull |
169 | mov r5, r5, lspush #\push |
170 | orr r5, r5, r4, lspull #\pull |
171 | mov r4, r4, lspush #\push |
172 | orr r4, r4, r3, lspull #\pull |
173 | stmdb r0!, {r4 - r6, r8 - r10, ip, lr} |
174 | bge 12b |
175 | PLD( cmn r2, #96 ) |
176 | PLD( bge 13b ) |
177 | |
178 | ldmfd sp!, {r5, r6, r8 - r10} |
179 | |
180 | 14: ands ip, r2, #28 |
181 | beq 16f |
182 | |
183 | 15: mov lr, r3, lspush #\push |
184 | ldr r3, [r1, #-4]! |
185 | subs ip, ip, #4 |
186 | orr lr, lr, r3, lspull #\pull |
187 | str lr, [r0, #-4]! |
188 | bgt 15b |
189 | CALGN( cmp r2, #0 ) |
190 | CALGN( bge 11b ) |
191 | |
192 | 16: add r1, r1, #(\pull / 8) |
193 | b 8b |
194 | |
195 | .endm |
196 | |
197 | |
198 | backward_copy_shift push=8 pull=24 |
199 | |
200 | 17: backward_copy_shift push=16 pull=16 |
201 | |
202 | 18: backward_copy_shift push=24 pull=8 |
203 | |
204 | UNWIND( .fnend ) |
205 | ENDPROC(memmove) |
206 | ENDPROC(__memmove) |
207 | |