1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) IBM Corporation, 2011
4 * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
5 * Author - Balbir Singh <bsingharora@gmail.com>
6 */
7#include <linux/export.h>
8#include <asm/ppc_asm.h>
9#include <asm/errno.h>
10
11 .macro err1
12100:
13 EX_TABLE(100b,.Ldo_err1)
14 .endm
15
16 .macro err2
17200:
18 EX_TABLE(200b,.Ldo_err2)
19 .endm
20
21 .macro err3
22300: EX_TABLE(300b,.Ldone)
23 .endm
24
25.Ldo_err2:
26 ld r22,STK_REG(R22)(r1)
27 ld r21,STK_REG(R21)(r1)
28 ld r20,STK_REG(R20)(r1)
29 ld r19,STK_REG(R19)(r1)
30 ld r18,STK_REG(R18)(r1)
31 ld r17,STK_REG(R17)(r1)
32 ld r16,STK_REG(R16)(r1)
33 ld r15,STK_REG(R15)(r1)
34 ld r14,STK_REG(R14)(r1)
35 addi r1,r1,STACKFRAMESIZE
36.Ldo_err1:
37 /* Do a byte by byte copy to get the exact remaining size */
38 mtctr r7
3946:
40err3; lbz r0,0(r4)
41 addi r4,r4,1
42err3; stb r0,0(r3)
43 addi r3,r3,1
44 bdnz 46b
45 li r3,0
46 blr
47
48.Ldone:
49 mfctr r3
50 blr
51
52
53_GLOBAL(copy_mc_generic)
54 mr r7,r5
55 cmpldi r5,16
56 blt .Lshort_copy
57
58.Lcopy:
59 /* Get the source 8B aligned */
60 neg r6,r4
61 mtocrf 0x01,r6
62 clrldi r6,r6,(64-3)
63
64 bf cr7*4+3,1f
65err1; lbz r0,0(r4)
66 addi r4,r4,1
67err1; stb r0,0(r3)
68 addi r3,r3,1
69 subi r7,r7,1
70
711: bf cr7*4+2,2f
72err1; lhz r0,0(r4)
73 addi r4,r4,2
74err1; sth r0,0(r3)
75 addi r3,r3,2
76 subi r7,r7,2
77
782: bf cr7*4+1,3f
79err1; lwz r0,0(r4)
80 addi r4,r4,4
81err1; stw r0,0(r3)
82 addi r3,r3,4
83 subi r7,r7,4
84
853: sub r5,r5,r6
86 cmpldi r5,128
87
88 mflr r0
89 stdu r1,-STACKFRAMESIZE(r1)
90 std r14,STK_REG(R14)(r1)
91 std r15,STK_REG(R15)(r1)
92 std r16,STK_REG(R16)(r1)
93 std r17,STK_REG(R17)(r1)
94 std r18,STK_REG(R18)(r1)
95 std r19,STK_REG(R19)(r1)
96 std r20,STK_REG(R20)(r1)
97 std r21,STK_REG(R21)(r1)
98 std r22,STK_REG(R22)(r1)
99 std r0,STACKFRAMESIZE+16(r1)
100
101 blt 5f
102 srdi r6,r5,7
103 mtctr r6
104
105 /* Now do cacheline (128B) sized loads and stores. */
106 .align 5
1074:
108err2; ld r0,0(r4)
109err2; ld r6,8(r4)
110err2; ld r8,16(r4)
111err2; ld r9,24(r4)
112err2; ld r10,32(r4)
113err2; ld r11,40(r4)
114err2; ld r12,48(r4)
115err2; ld r14,56(r4)
116err2; ld r15,64(r4)
117err2; ld r16,72(r4)
118err2; ld r17,80(r4)
119err2; ld r18,88(r4)
120err2; ld r19,96(r4)
121err2; ld r20,104(r4)
122err2; ld r21,112(r4)
123err2; ld r22,120(r4)
124 addi r4,r4,128
125err2; std r0,0(r3)
126err2; std r6,8(r3)
127err2; std r8,16(r3)
128err2; std r9,24(r3)
129err2; std r10,32(r3)
130err2; std r11,40(r3)
131err2; std r12,48(r3)
132err2; std r14,56(r3)
133err2; std r15,64(r3)
134err2; std r16,72(r3)
135err2; std r17,80(r3)
136err2; std r18,88(r3)
137err2; std r19,96(r3)
138err2; std r20,104(r3)
139err2; std r21,112(r3)
140err2; std r22,120(r3)
141 addi r3,r3,128
142 subi r7,r7,128
143 bdnz 4b
144
145 clrldi r5,r5,(64-7)
146
147 /* Up to 127B to go */
1485: srdi r6,r5,4
149 mtocrf 0x01,r6
150
1516: bf cr7*4+1,7f
152err2; ld r0,0(r4)
153err2; ld r6,8(r4)
154err2; ld r8,16(r4)
155err2; ld r9,24(r4)
156err2; ld r10,32(r4)
157err2; ld r11,40(r4)
158err2; ld r12,48(r4)
159err2; ld r14,56(r4)
160 addi r4,r4,64
161err2; std r0,0(r3)
162err2; std r6,8(r3)
163err2; std r8,16(r3)
164err2; std r9,24(r3)
165err2; std r10,32(r3)
166err2; std r11,40(r3)
167err2; std r12,48(r3)
168err2; std r14,56(r3)
169 addi r3,r3,64
170 subi r7,r7,64
171
1727: ld r14,STK_REG(R14)(r1)
173 ld r15,STK_REG(R15)(r1)
174 ld r16,STK_REG(R16)(r1)
175 ld r17,STK_REG(R17)(r1)
176 ld r18,STK_REG(R18)(r1)
177 ld r19,STK_REG(R19)(r1)
178 ld r20,STK_REG(R20)(r1)
179 ld r21,STK_REG(R21)(r1)
180 ld r22,STK_REG(R22)(r1)
181 addi r1,r1,STACKFRAMESIZE
182
183 /* Up to 63B to go */
184 bf cr7*4+2,8f
185err1; ld r0,0(r4)
186err1; ld r6,8(r4)
187err1; ld r8,16(r4)
188err1; ld r9,24(r4)
189 addi r4,r4,32
190err1; std r0,0(r3)
191err1; std r6,8(r3)
192err1; std r8,16(r3)
193err1; std r9,24(r3)
194 addi r3,r3,32
195 subi r7,r7,32
196
197 /* Up to 31B to go */
1988: bf cr7*4+3,9f
199err1; ld r0,0(r4)
200err1; ld r6,8(r4)
201 addi r4,r4,16
202err1; std r0,0(r3)
203err1; std r6,8(r3)
204 addi r3,r3,16
205 subi r7,r7,16
206
2079: clrldi r5,r5,(64-4)
208
209 /* Up to 15B to go */
210.Lshort_copy:
211 mtocrf 0x01,r5
212 bf cr7*4+0,12f
213err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
214err1; lwz r6,4(r4)
215 addi r4,r4,8
216err1; stw r0,0(r3)
217err1; stw r6,4(r3)
218 addi r3,r3,8
219 subi r7,r7,8
220
22112: bf cr7*4+1,13f
222err1; lwz r0,0(r4)
223 addi r4,r4,4
224err1; stw r0,0(r3)
225 addi r3,r3,4
226 subi r7,r7,4
227
22813: bf cr7*4+2,14f
229err1; lhz r0,0(r4)
230 addi r4,r4,2
231err1; sth r0,0(r3)
232 addi r3,r3,2
233 subi r7,r7,2
234
23514: bf cr7*4+3,15f
236err1; lbz r0,0(r4)
237err1; stb r0,0(r3)
238
23915: li r3,0
240 blr
241
242EXPORT_SYMBOL_GPL(copy_mc_generic);
243

source code of linux/arch/powerpc/lib/copy_mc_64.S