1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Copyright (C) IBM Corporation, 2011 |
4 | * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> |
5 | * Author - Balbir Singh <bsingharora@gmail.com> |
6 | */ |
7 | #include <linux/export.h> |
8 | #include <asm/ppc_asm.h> |
9 | #include <asm/errno.h> |
10 | |
11 | .macro err1 |
12 | 100: |
13 | EX_TABLE(100b,.Ldo_err1) |
14 | .endm |
15 | |
16 | .macro err2 |
17 | 200: |
18 | EX_TABLE(200b,.Ldo_err2) |
19 | .endm |
20 | |
21 | .macro err3 |
22 | 300: EX_TABLE(300b,.Ldone) |
23 | .endm |
24 | |
25 | .Ldo_err2: |
26 | ld r22,STK_REG(R22)(r1) |
27 | ld r21,STK_REG(R21)(r1) |
28 | ld r20,STK_REG(R20)(r1) |
29 | ld r19,STK_REG(R19)(r1) |
30 | ld r18,STK_REG(R18)(r1) |
31 | ld r17,STK_REG(R17)(r1) |
32 | ld r16,STK_REG(R16)(r1) |
33 | ld r15,STK_REG(R15)(r1) |
34 | ld r14,STK_REG(R14)(r1) |
35 | addi r1,r1,STACKFRAMESIZE |
36 | .Ldo_err1: |
37 | /* Do a byte by byte copy to get the exact remaining size */ |
38 | mtctr r7 |
39 | 46: |
40 | err3; lbz r0,0(r4) |
41 | addi r4,r4,1 |
42 | err3; stb r0,0(r3) |
43 | addi r3,r3,1 |
44 | bdnz 46b |
45 | li r3,0 |
46 | blr |
47 | |
48 | .Ldone: |
49 | mfctr r3 |
50 | blr |
51 | |
52 | |
53 | _GLOBAL(copy_mc_generic) |
54 | mr r7,r5 |
55 | cmpldi r5,16 |
56 | blt .Lshort_copy |
57 | |
58 | .Lcopy: |
59 | /* Get the source 8B aligned */ |
60 | neg r6,r4 |
61 | mtocrf 0x01,r6 |
62 | clrldi r6,r6,(64-3) |
63 | |
64 | bf cr7*4+3,1f |
65 | err1; lbz r0,0(r4) |
66 | addi r4,r4,1 |
67 | err1; stb r0,0(r3) |
68 | addi r3,r3,1 |
69 | subi r7,r7,1 |
70 | |
71 | 1: bf cr7*4+2,2f |
72 | err1; lhz r0,0(r4) |
73 | addi r4,r4,2 |
74 | err1; sth r0,0(r3) |
75 | addi r3,r3,2 |
76 | subi r7,r7,2 |
77 | |
78 | 2: bf cr7*4+1,3f |
79 | err1; lwz r0,0(r4) |
80 | addi r4,r4,4 |
81 | err1; stw r0,0(r3) |
82 | addi r3,r3,4 |
83 | subi r7,r7,4 |
84 | |
85 | 3: sub r5,r5,r6 |
86 | cmpldi r5,128 |
87 | |
88 | mflr r0 |
89 | stdu r1,-STACKFRAMESIZE(r1) |
90 | std r14,STK_REG(R14)(r1) |
91 | std r15,STK_REG(R15)(r1) |
92 | std r16,STK_REG(R16)(r1) |
93 | std r17,STK_REG(R17)(r1) |
94 | std r18,STK_REG(R18)(r1) |
95 | std r19,STK_REG(R19)(r1) |
96 | std r20,STK_REG(R20)(r1) |
97 | std r21,STK_REG(R21)(r1) |
98 | std r22,STK_REG(R22)(r1) |
99 | std r0,STACKFRAMESIZE+16(r1) |
100 | |
101 | blt 5f |
102 | srdi r6,r5,7 |
103 | mtctr r6 |
104 | |
105 | /* Now do cacheline (128B) sized loads and stores. */ |
106 | .align 5 |
107 | 4: |
108 | err2; ld r0,0(r4) |
109 | err2; ld r6,8(r4) |
110 | err2; ld r8,16(r4) |
111 | err2; ld r9,24(r4) |
112 | err2; ld r10,32(r4) |
113 | err2; ld r11,40(r4) |
114 | err2; ld r12,48(r4) |
115 | err2; ld r14,56(r4) |
116 | err2; ld r15,64(r4) |
117 | err2; ld r16,72(r4) |
118 | err2; ld r17,80(r4) |
119 | err2; ld r18,88(r4) |
120 | err2; ld r19,96(r4) |
121 | err2; ld r20,104(r4) |
122 | err2; ld r21,112(r4) |
123 | err2; ld r22,120(r4) |
124 | addi r4,r4,128 |
125 | err2; std r0,0(r3) |
126 | err2; std r6,8(r3) |
127 | err2; std r8,16(r3) |
128 | err2; std r9,24(r3) |
129 | err2; std r10,32(r3) |
130 | err2; std r11,40(r3) |
131 | err2; std r12,48(r3) |
132 | err2; std r14,56(r3) |
133 | err2; std r15,64(r3) |
134 | err2; std r16,72(r3) |
135 | err2; std r17,80(r3) |
136 | err2; std r18,88(r3) |
137 | err2; std r19,96(r3) |
138 | err2; std r20,104(r3) |
139 | err2; std r21,112(r3) |
140 | err2; std r22,120(r3) |
141 | addi r3,r3,128 |
142 | subi r7,r7,128 |
143 | bdnz 4b |
144 | |
145 | clrldi r5,r5,(64-7) |
146 | |
147 | /* Up to 127B to go */ |
148 | 5: srdi r6,r5,4 |
149 | mtocrf 0x01,r6 |
150 | |
151 | 6: bf cr7*4+1,7f |
152 | err2; ld r0,0(r4) |
153 | err2; ld r6,8(r4) |
154 | err2; ld r8,16(r4) |
155 | err2; ld r9,24(r4) |
156 | err2; ld r10,32(r4) |
157 | err2; ld r11,40(r4) |
158 | err2; ld r12,48(r4) |
159 | err2; ld r14,56(r4) |
160 | addi r4,r4,64 |
161 | err2; std r0,0(r3) |
162 | err2; std r6,8(r3) |
163 | err2; std r8,16(r3) |
164 | err2; std r9,24(r3) |
165 | err2; std r10,32(r3) |
166 | err2; std r11,40(r3) |
167 | err2; std r12,48(r3) |
168 | err2; std r14,56(r3) |
169 | addi r3,r3,64 |
170 | subi r7,r7,64 |
171 | |
172 | 7: ld r14,STK_REG(R14)(r1) |
173 | ld r15,STK_REG(R15)(r1) |
174 | ld r16,STK_REG(R16)(r1) |
175 | ld r17,STK_REG(R17)(r1) |
176 | ld r18,STK_REG(R18)(r1) |
177 | ld r19,STK_REG(R19)(r1) |
178 | ld r20,STK_REG(R20)(r1) |
179 | ld r21,STK_REG(R21)(r1) |
180 | ld r22,STK_REG(R22)(r1) |
181 | addi r1,r1,STACKFRAMESIZE |
182 | |
183 | /* Up to 63B to go */ |
184 | bf cr7*4+2,8f |
185 | err1; ld r0,0(r4) |
186 | err1; ld r6,8(r4) |
187 | err1; ld r8,16(r4) |
188 | err1; ld r9,24(r4) |
189 | addi r4,r4,32 |
190 | err1; std r0,0(r3) |
191 | err1; std r6,8(r3) |
192 | err1; std r8,16(r3) |
193 | err1; std r9,24(r3) |
194 | addi r3,r3,32 |
195 | subi r7,r7,32 |
196 | |
197 | /* Up to 31B to go */ |
198 | 8: bf cr7*4+3,9f |
199 | err1; ld r0,0(r4) |
200 | err1; ld r6,8(r4) |
201 | addi r4,r4,16 |
202 | err1; std r0,0(r3) |
203 | err1; std r6,8(r3) |
204 | addi r3,r3,16 |
205 | subi r7,r7,16 |
206 | |
207 | 9: clrldi r5,r5,(64-4) |
208 | |
209 | /* Up to 15B to go */ |
210 | .Lshort_copy: |
211 | mtocrf 0x01,r5 |
212 | bf cr7*4+0,12f |
213 | err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ |
214 | err1; lwz r6,4(r4) |
215 | addi r4,r4,8 |
216 | err1; stw r0,0(r3) |
217 | err1; stw r6,4(r3) |
218 | addi r3,r3,8 |
219 | subi r7,r7,8 |
220 | |
221 | 12: bf cr7*4+1,13f |
222 | err1; lwz r0,0(r4) |
223 | addi r4,r4,4 |
224 | err1; stw r0,0(r3) |
225 | addi r3,r3,4 |
226 | subi r7,r7,4 |
227 | |
228 | 13: bf cr7*4+2,14f |
229 | err1; lhz r0,0(r4) |
230 | addi r4,r4,2 |
231 | err1; sth r0,0(r3) |
232 | addi r3,r3,2 |
233 | subi r7,r7,2 |
234 | |
235 | 14: bf cr7*4+3,15f |
236 | err1; lbz r0,0(r4) |
237 | err1; stb r0,0(r3) |
238 | |
239 | 15: li r3,0 |
240 | blr |
241 | |
242 | EXPORT_SYMBOL_GPL(copy_mc_generic); |
243 | |