1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * PARISC TLB and cache flushing support |
4 | * Copyright (C) 2000-2001 Hewlett-Packard (John Marvin) |
5 | * Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org) |
6 | * Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org) |
7 | */ |
8 | |
9 | /* |
10 | * NOTE: fdc,fic, and pdc instructions that use base register modification |
11 | * should only use index and base registers that are not shadowed, |
12 | * so that the fast path emulation in the non access miss handler |
13 | * can be used. |
14 | */ |
15 | |
16 | #ifdef CONFIG_64BIT |
17 | .level 2.0w |
18 | #else |
19 | .level 2.0 |
20 | #endif |
21 | |
22 | #include <asm/psw.h> |
23 | #include <asm/assembly.h> |
24 | #include <asm/cache.h> |
25 | #include <asm/ldcw.h> |
26 | #include <asm/alternative.h> |
27 | #include <linux/linkage.h> |
28 | #include <linux/init.h> |
29 | #include <linux/pgtable.h> |
30 | |
31 | .section .text.hot |
32 | .align 16 |
33 | |
34 | ENTRY_CFI(flush_tlb_all_local) |
35 | /* |
36 | * The pitlbe and pdtlbe instructions should only be used to |
37 | * flush the entire tlb. Also, there needs to be no intervening |
38 | * tlb operations, e.g. tlb misses, so the operation needs |
39 | * to happen in real mode with all interruptions disabled. |
40 | */ |
41 | |
42 | /* pcxt_ssm_bug - relied upon translation! PA 2.0 Arch. F-4 and F-5 */ |
43 | rsm PSW_SM_I, %r19 /* save I-bit state */ |
44 | load32 PA(1f), %r1 |
45 | nop |
46 | nop |
47 | nop |
48 | nop |
49 | nop |
50 | |
51 | rsm PSW_SM_Q, %r0 /* prep to load iia queue */ |
52 | mtctl %r0, %cr17 /* Clear IIASQ tail */ |
53 | mtctl %r0, %cr17 /* Clear IIASQ head */ |
54 | mtctl %r1, %cr18 /* IIAOQ head */ |
55 | ldo 4(%r1), %r1 |
56 | mtctl %r1, %cr18 /* IIAOQ tail */ |
57 | load32 REAL_MODE_PSW, %r1 |
58 | mtctl %r1, %ipsw |
59 | rfi |
60 | nop |
61 | |
62 | 1: load32 PA(cache_info), %r1 |
63 | |
64 | /* Flush Instruction Tlb */ |
65 | |
66 | 88: LDREG ITLB_SID_BASE(%r1), %r20 |
67 | LDREG ITLB_SID_STRIDE(%r1), %r21 |
68 | LDREG ITLB_SID_COUNT(%r1), %r22 |
69 | LDREG ITLB_OFF_BASE(%r1), %arg0 |
70 | LDREG ITLB_OFF_STRIDE(%r1), %arg1 |
71 | LDREG ITLB_OFF_COUNT(%r1), %arg2 |
72 | LDREG ITLB_LOOP(%r1), %arg3 |
73 | |
74 | addib,COND(=) -1, %arg3, fitoneloop /* Preadjust and test */ |
75 | movb,<,n %arg3, %r31, fitdone /* If loop < 0, skip */ |
76 | copy %arg0, %r28 /* Init base addr */ |
77 | |
78 | fitmanyloop: /* Loop if LOOP >= 2 */ |
79 | mtsp %r20, %sr1 |
80 | add %r21, %r20, %r20 /* increment space */ |
81 | copy %arg2, %r29 /* Init middle loop count */ |
82 | |
83 | fitmanymiddle: /* Loop if LOOP >= 2 */ |
84 | addib,COND(>) -1, %r31, fitmanymiddle /* Adjusted inner loop decr */ |
85 | pitlbe %r0(%sr1, %r28) |
86 | pitlbe,m %arg1(%sr1, %r28) /* Last pitlbe and addr adjust */ |
87 | addib,COND(>) -1, %r29, fitmanymiddle /* Middle loop decr */ |
88 | copy %arg3, %r31 /* Re-init inner loop count */ |
89 | |
90 | movb,tr %arg0, %r28, fitmanyloop /* Re-init base addr */ |
91 | addib,COND(<=),n -1, %r22, fitdone /* Outer loop count decr */ |
92 | |
93 | fitoneloop: /* Loop if LOOP = 1 */ |
94 | mtsp %r20, %sr1 |
95 | copy %arg0, %r28 /* init base addr */ |
96 | copy %arg2, %r29 /* init middle loop count */ |
97 | |
98 | fitonemiddle: /* Loop if LOOP = 1 */ |
99 | addib,COND(>) -1, %r29, fitonemiddle /* Middle loop count decr */ |
100 | pitlbe,m %arg1(%sr1, %r28) /* pitlbe for one loop */ |
101 | |
102 | addib,COND(>) -1, %r22, fitoneloop /* Outer loop count decr */ |
103 | add %r21, %r20, %r20 /* increment space */ |
104 | |
105 | fitdone: |
106 | ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP) |
107 | |
108 | /* Flush Data Tlb */ |
109 | |
110 | LDREG DTLB_SID_BASE(%r1), %r20 |
111 | LDREG DTLB_SID_STRIDE(%r1), %r21 |
112 | LDREG DTLB_SID_COUNT(%r1), %r22 |
113 | LDREG DTLB_OFF_BASE(%r1), %arg0 |
114 | LDREG DTLB_OFF_STRIDE(%r1), %arg1 |
115 | LDREG DTLB_OFF_COUNT(%r1), %arg2 |
116 | LDREG DTLB_LOOP(%r1), %arg3 |
117 | |
118 | addib,COND(=) -1, %arg3, fdtoneloop /* Preadjust and test */ |
119 | movb,<,n %arg3, %r31, fdtdone /* If loop < 0, skip */ |
120 | copy %arg0, %r28 /* Init base addr */ |
121 | |
122 | fdtmanyloop: /* Loop if LOOP >= 2 */ |
123 | mtsp %r20, %sr1 |
124 | add %r21, %r20, %r20 /* increment space */ |
125 | copy %arg2, %r29 /* Init middle loop count */ |
126 | |
127 | fdtmanymiddle: /* Loop if LOOP >= 2 */ |
128 | addib,COND(>) -1, %r31, fdtmanymiddle /* Adjusted inner loop decr */ |
129 | pdtlbe %r0(%sr1, %r28) |
130 | pdtlbe,m %arg1(%sr1, %r28) /* Last pdtlbe and addr adjust */ |
131 | addib,COND(>) -1, %r29, fdtmanymiddle /* Middle loop decr */ |
132 | copy %arg3, %r31 /* Re-init inner loop count */ |
133 | |
134 | movb,tr %arg0, %r28, fdtmanyloop /* Re-init base addr */ |
135 | addib,COND(<=),n -1, %r22,fdtdone /* Outer loop count decr */ |
136 | |
137 | fdtoneloop: /* Loop if LOOP = 1 */ |
138 | mtsp %r20, %sr1 |
139 | copy %arg0, %r28 /* init base addr */ |
140 | copy %arg2, %r29 /* init middle loop count */ |
141 | |
142 | fdtonemiddle: /* Loop if LOOP = 1 */ |
143 | addib,COND(>) -1, %r29, fdtonemiddle /* Middle loop count decr */ |
144 | pdtlbe,m %arg1(%sr1, %r28) /* pdtlbe for one loop */ |
145 | |
146 | addib,COND(>) -1, %r22, fdtoneloop /* Outer loop count decr */ |
147 | add %r21, %r20, %r20 /* increment space */ |
148 | |
149 | |
150 | fdtdone: |
151 | /* |
152 | * Switch back to virtual mode |
153 | */ |
154 | /* pcxt_ssm_bug */ |
155 | rsm PSW_SM_I, %r0 |
156 | load32 2f, %r1 |
157 | nop |
158 | nop |
159 | nop |
160 | nop |
161 | nop |
162 | |
163 | rsm PSW_SM_Q, %r0 /* prep to load iia queue */ |
164 | mtctl %r0, %cr17 /* Clear IIASQ tail */ |
165 | mtctl %r0, %cr17 /* Clear IIASQ head */ |
166 | mtctl %r1, %cr18 /* IIAOQ head */ |
167 | ldo 4(%r1), %r1 |
168 | mtctl %r1, %cr18 /* IIAOQ tail */ |
169 | load32 KERNEL_PSW, %r1 |
170 | or %r1, %r19, %r1 /* I-bit to state on entry */ |
171 | mtctl %r1, %ipsw /* restore I-bit (entire PSW) */ |
172 | rfi |
173 | nop |
174 | |
175 | 2: bv %r0(%r2) |
176 | nop |
177 | |
178 | /* |
179 | * When running in qemu, drop whole flush_tlb_all_local function and |
180 | * replace by one pdtlbe instruction, for which QEMU will drop all |
181 | * local TLB entries. |
182 | */ |
183 | 3: pdtlbe %r0(%sr1,%r0) |
184 | bv,n %r0(%r2) |
185 | ALTERNATIVE_CODE(flush_tlb_all_local, 2, ALT_COND_RUN_ON_QEMU, 3b) |
186 | ENDPROC_CFI(flush_tlb_all_local) |
187 | |
188 | .import cache_info,data |
189 | |
190 | ENTRY_CFI(flush_instruction_cache_local) |
191 | 88: load32 cache_info, %r1 |
192 | |
193 | /* Flush Instruction Cache */ |
194 | |
195 | LDREG ICACHE_BASE(%r1), %arg0 |
196 | LDREG ICACHE_STRIDE(%r1), %arg1 |
197 | LDREG ICACHE_COUNT(%r1), %arg2 |
198 | LDREG ICACHE_LOOP(%r1), %arg3 |
199 | rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ |
200 | mtsp %r0, %sr1 |
201 | addib,COND(=) -1, %arg3, fioneloop /* Preadjust and test */ |
202 | movb,<,n %arg3, %r31, fisync /* If loop < 0, do sync */ |
203 | |
204 | fimanyloop: /* Loop if LOOP >= 2 */ |
205 | addib,COND(>) -1, %r31, fimanyloop /* Adjusted inner loop decr */ |
206 | fice %r0(%sr1, %arg0) |
207 | fice,m %arg1(%sr1, %arg0) /* Last fice and addr adjust */ |
208 | movb,tr %arg3, %r31, fimanyloop /* Re-init inner loop count */ |
209 | addib,COND(<=),n -1, %arg2, fisync /* Outer loop decr */ |
210 | |
211 | fioneloop: /* Loop if LOOP = 1 */ |
212 | /* Some implementations may flush with a single fice instruction */ |
213 | cmpib,COND(>>=),n 15, %arg2, fioneloop2 |
214 | |
215 | fioneloop1: |
216 | fice,m %arg1(%sr1, %arg0) |
217 | fice,m %arg1(%sr1, %arg0) |
218 | fice,m %arg1(%sr1, %arg0) |
219 | fice,m %arg1(%sr1, %arg0) |
220 | fice,m %arg1(%sr1, %arg0) |
221 | fice,m %arg1(%sr1, %arg0) |
222 | fice,m %arg1(%sr1, %arg0) |
223 | fice,m %arg1(%sr1, %arg0) |
224 | fice,m %arg1(%sr1, %arg0) |
225 | fice,m %arg1(%sr1, %arg0) |
226 | fice,m %arg1(%sr1, %arg0) |
227 | fice,m %arg1(%sr1, %arg0) |
228 | fice,m %arg1(%sr1, %arg0) |
229 | fice,m %arg1(%sr1, %arg0) |
230 | fice,m %arg1(%sr1, %arg0) |
231 | addib,COND(>) -16, %arg2, fioneloop1 |
232 | fice,m %arg1(%sr1, %arg0) |
233 | |
234 | /* Check if done */ |
235 | cmpb,COND(=),n %arg2, %r0, fisync /* Predict branch taken */ |
236 | |
237 | fioneloop2: |
238 | addib,COND(>) -1, %arg2, fioneloop2 /* Outer loop count decr */ |
239 | fice,m %arg1(%sr1, %arg0) /* Fice for one loop */ |
240 | |
241 | fisync: |
242 | sync |
243 | mtsm %r22 /* restore I-bit */ |
244 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) |
245 | bv %r0(%r2) |
246 | nop |
247 | ENDPROC_CFI(flush_instruction_cache_local) |
248 | |
249 | |
250 | .import cache_info, data |
251 | ENTRY_CFI(flush_data_cache_local) |
252 | 88: load32 cache_info, %r1 |
253 | |
254 | /* Flush Data Cache */ |
255 | |
256 | LDREG DCACHE_BASE(%r1), %arg0 |
257 | LDREG DCACHE_STRIDE(%r1), %arg1 |
258 | LDREG DCACHE_COUNT(%r1), %arg2 |
259 | LDREG DCACHE_LOOP(%r1), %arg3 |
260 | rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ |
261 | mtsp %r0, %sr1 |
262 | addib,COND(=) -1, %arg3, fdoneloop /* Preadjust and test */ |
263 | movb,<,n %arg3, %r31, fdsync /* If loop < 0, do sync */ |
264 | |
265 | fdmanyloop: /* Loop if LOOP >= 2 */ |
266 | addib,COND(>) -1, %r31, fdmanyloop /* Adjusted inner loop decr */ |
267 | fdce %r0(%sr1, %arg0) |
268 | fdce,m %arg1(%sr1, %arg0) /* Last fdce and addr adjust */ |
269 | movb,tr %arg3, %r31, fdmanyloop /* Re-init inner loop count */ |
270 | addib,COND(<=),n -1, %arg2, fdsync /* Outer loop decr */ |
271 | |
272 | fdoneloop: /* Loop if LOOP = 1 */ |
273 | /* Some implementations may flush with a single fdce instruction */ |
274 | cmpib,COND(>>=),n 15, %arg2, fdoneloop2 |
275 | |
276 | fdoneloop1: |
277 | fdce,m %arg1(%sr1, %arg0) |
278 | fdce,m %arg1(%sr1, %arg0) |
279 | fdce,m %arg1(%sr1, %arg0) |
280 | fdce,m %arg1(%sr1, %arg0) |
281 | fdce,m %arg1(%sr1, %arg0) |
282 | fdce,m %arg1(%sr1, %arg0) |
283 | fdce,m %arg1(%sr1, %arg0) |
284 | fdce,m %arg1(%sr1, %arg0) |
285 | fdce,m %arg1(%sr1, %arg0) |
286 | fdce,m %arg1(%sr1, %arg0) |
287 | fdce,m %arg1(%sr1, %arg0) |
288 | fdce,m %arg1(%sr1, %arg0) |
289 | fdce,m %arg1(%sr1, %arg0) |
290 | fdce,m %arg1(%sr1, %arg0) |
291 | fdce,m %arg1(%sr1, %arg0) |
292 | addib,COND(>) -16, %arg2, fdoneloop1 |
293 | fdce,m %arg1(%sr1, %arg0) |
294 | |
295 | /* Check if done */ |
296 | cmpb,COND(=),n %arg2, %r0, fdsync /* Predict branch taken */ |
297 | |
298 | fdoneloop2: |
299 | addib,COND(>) -1, %arg2, fdoneloop2 /* Outer loop count decr */ |
300 | fdce,m %arg1(%sr1, %arg0) /* Fdce for one loop */ |
301 | |
302 | fdsync: |
303 | sync |
304 | mtsm %r22 /* restore I-bit */ |
305 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) |
306 | bv %r0(%r2) |
307 | nop |
308 | ENDPROC_CFI(flush_data_cache_local) |
309 | |
310 | /* Clear page using kernel mapping. */ |
311 | |
312 | ENTRY_CFI(clear_page_asm) |
313 | #ifdef CONFIG_64BIT |
314 | |
315 | /* Unroll the loop. */ |
316 | ldi (PAGE_SIZE / 128), %r1 |
317 | |
318 | 1: |
319 | std %r0, 0(%r26) |
320 | std %r0, 8(%r26) |
321 | std %r0, 16(%r26) |
322 | std %r0, 24(%r26) |
323 | std %r0, 32(%r26) |
324 | std %r0, 40(%r26) |
325 | std %r0, 48(%r26) |
326 | std %r0, 56(%r26) |
327 | std %r0, 64(%r26) |
328 | std %r0, 72(%r26) |
329 | std %r0, 80(%r26) |
330 | std %r0, 88(%r26) |
331 | std %r0, 96(%r26) |
332 | std %r0, 104(%r26) |
333 | std %r0, 112(%r26) |
334 | std %r0, 120(%r26) |
335 | |
336 | /* Note reverse branch hint for addib is taken. */ |
337 | addib,COND(>),n -1, %r1, 1b |
338 | ldo 128(%r26), %r26 |
339 | |
340 | #else |
341 | |
342 | /* |
343 | * Note that until (if) we start saving the full 64-bit register |
344 | * values on interrupt, we can't use std on a 32 bit kernel. |
345 | */ |
346 | ldi (PAGE_SIZE / 64), %r1 |
347 | |
348 | 1: |
349 | stw %r0, 0(%r26) |
350 | stw %r0, 4(%r26) |
351 | stw %r0, 8(%r26) |
352 | stw %r0, 12(%r26) |
353 | stw %r0, 16(%r26) |
354 | stw %r0, 20(%r26) |
355 | stw %r0, 24(%r26) |
356 | stw %r0, 28(%r26) |
357 | stw %r0, 32(%r26) |
358 | stw %r0, 36(%r26) |
359 | stw %r0, 40(%r26) |
360 | stw %r0, 44(%r26) |
361 | stw %r0, 48(%r26) |
362 | stw %r0, 52(%r26) |
363 | stw %r0, 56(%r26) |
364 | stw %r0, 60(%r26) |
365 | |
366 | addib,COND(>),n -1, %r1, 1b |
367 | ldo 64(%r26), %r26 |
368 | #endif |
369 | bv %r0(%r2) |
370 | nop |
371 | ENDPROC_CFI(clear_page_asm) |
372 | |
373 | /* Copy page using kernel mapping. */ |
374 | |
375 | ENTRY_CFI(copy_page_asm) |
376 | #ifdef CONFIG_64BIT |
377 | /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. |
378 | * Unroll the loop by hand and arrange insn appropriately. |
379 | * Prefetch doesn't improve performance on rp3440. |
380 | * GCC probably can do this just as well... |
381 | */ |
382 | |
383 | ldi (PAGE_SIZE / 128), %r1 |
384 | |
385 | 1: ldd 0(%r25), %r19 |
386 | ldd 8(%r25), %r20 |
387 | |
388 | ldd 16(%r25), %r21 |
389 | ldd 24(%r25), %r22 |
390 | std %r19, 0(%r26) |
391 | std %r20, 8(%r26) |
392 | |
393 | ldd 32(%r25), %r19 |
394 | ldd 40(%r25), %r20 |
395 | std %r21, 16(%r26) |
396 | std %r22, 24(%r26) |
397 | |
398 | ldd 48(%r25), %r21 |
399 | ldd 56(%r25), %r22 |
400 | std %r19, 32(%r26) |
401 | std %r20, 40(%r26) |
402 | |
403 | ldd 64(%r25), %r19 |
404 | ldd 72(%r25), %r20 |
405 | std %r21, 48(%r26) |
406 | std %r22, 56(%r26) |
407 | |
408 | ldd 80(%r25), %r21 |
409 | ldd 88(%r25), %r22 |
410 | std %r19, 64(%r26) |
411 | std %r20, 72(%r26) |
412 | |
413 | ldd 96(%r25), %r19 |
414 | ldd 104(%r25), %r20 |
415 | std %r21, 80(%r26) |
416 | std %r22, 88(%r26) |
417 | |
418 | ldd 112(%r25), %r21 |
419 | ldd 120(%r25), %r22 |
420 | ldo 128(%r25), %r25 |
421 | std %r19, 96(%r26) |
422 | std %r20, 104(%r26) |
423 | |
424 | std %r21, 112(%r26) |
425 | std %r22, 120(%r26) |
426 | |
427 | /* Note reverse branch hint for addib is taken. */ |
428 | addib,COND(>),n -1, %r1, 1b |
429 | ldo 128(%r26), %r26 |
430 | |
431 | #else |
432 | |
433 | /* |
434 | * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw |
435 | * bundles (very restricted rules for bundling). |
436 | * Note that until (if) we start saving |
437 | * the full 64 bit register values on interrupt, we can't |
438 | * use ldd/std on a 32 bit kernel. |
439 | */ |
440 | ldw 0(%r25), %r19 |
441 | ldi (PAGE_SIZE / 64), %r1 |
442 | |
443 | 1: |
444 | ldw 4(%r25), %r20 |
445 | ldw 8(%r25), %r21 |
446 | ldw 12(%r25), %r22 |
447 | stw %r19, 0(%r26) |
448 | stw %r20, 4(%r26) |
449 | stw %r21, 8(%r26) |
450 | stw %r22, 12(%r26) |
451 | ldw 16(%r25), %r19 |
452 | ldw 20(%r25), %r20 |
453 | ldw 24(%r25), %r21 |
454 | ldw 28(%r25), %r22 |
455 | stw %r19, 16(%r26) |
456 | stw %r20, 20(%r26) |
457 | stw %r21, 24(%r26) |
458 | stw %r22, 28(%r26) |
459 | ldw 32(%r25), %r19 |
460 | ldw 36(%r25), %r20 |
461 | ldw 40(%r25), %r21 |
462 | ldw 44(%r25), %r22 |
463 | stw %r19, 32(%r26) |
464 | stw %r20, 36(%r26) |
465 | stw %r21, 40(%r26) |
466 | stw %r22, 44(%r26) |
467 | ldw 48(%r25), %r19 |
468 | ldw 52(%r25), %r20 |
469 | ldw 56(%r25), %r21 |
470 | ldw 60(%r25), %r22 |
471 | stw %r19, 48(%r26) |
472 | stw %r20, 52(%r26) |
473 | ldo 64(%r25), %r25 |
474 | stw %r21, 56(%r26) |
475 | stw %r22, 60(%r26) |
476 | ldo 64(%r26), %r26 |
477 | addib,COND(>),n -1, %r1, 1b |
478 | ldw 0(%r25), %r19 |
479 | #endif |
480 | bv %r0(%r2) |
481 | nop |
482 | ENDPROC_CFI(copy_page_asm) |
483 | |
484 | /* |
485 | * NOTE: Code in clear_user_page has a hard coded dependency on the |
486 | * maximum alias boundary being 4 Mb. We've been assured by the |
487 | * parisc chip designers that there will not ever be a parisc |
488 | * chip with a larger alias boundary (Never say never :-) ). |
489 | * |
490 | * Yah, what about the PA8800 and PA8900 processors? |
491 | * |
492 | * Subtle: the dtlb miss handlers support the temp alias region by |
493 | * "knowing" that if a dtlb miss happens within the temp alias |
494 | * region it must have occurred while in clear_user_page. Since |
495 | * this routine makes use of processor local translations, we |
496 | * don't want to insert them into the kernel page table. Instead, |
497 | * we load up some general registers (they need to be registers |
498 | * which aren't shadowed) with the physical page numbers (preshifted |
499 | * for tlb insertion) needed to insert the translations. When we |
500 | * miss on the translation, the dtlb miss handler inserts the |
501 | * translation into the tlb using these values: |
502 | * |
503 | * %r26 physical address of "to" translation |
504 | * %r23 physical address of "from" translation |
505 | */ |
506 | |
507 | /* |
508 | * copy_user_page_asm() performs a page copy using mappings |
509 | * equivalent to the user page mappings. It can be used to |
510 | * implement copy_user_page() but unfortunately both the `from' |
511 | * and `to' pages need to be flushed through mappings equivalent |
512 | * to the user mappings after the copy because the kernel accesses |
513 | * the `from' page through the kmap kernel mapping and the `to' |
514 | * page needs to be flushed since code can be copied. As a |
515 | * result, this implementation is less efficient than the simpler |
516 | * copy using the kernel mapping. It only needs the `from' page |
517 | * to flushed via the user mapping. The kunmap routines handle |
518 | * the flushes needed for the kernel mapping. |
519 | * |
520 | * I'm still keeping this around because it may be possible to |
521 | * use it if more information is passed into copy_user_page(). |
522 | * Have to do some measurements to see if it is worthwhile to |
523 | * lobby for such a change. |
524 | * |
525 | */ |
526 | |
527 | ENTRY_CFI(copy_user_page_asm) |
528 | /* Convert virtual `to' and `from' addresses to physical addresses. |
529 | Move `from' physical address to non shadowed register. */ |
530 | ldil L%(__PAGE_OFFSET), %r1 |
531 | sub %r26, %r1, %r26 |
532 | sub %r25, %r1, %r23 |
533 | |
534 | ldil L%(TMPALIAS_MAP_START), %r28 |
535 | dep_safe %r24, 31,TMPALIAS_SIZE_BITS, %r28 /* Form aliased virtual address 'to' */ |
536 | depi_safe 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ |
537 | copy %r28, %r29 |
538 | depi_safe 1, 31-TMPALIAS_SIZE_BITS,1, %r29 /* Form aliased virtual address 'from' */ |
539 | |
540 | /* Purge any old translations */ |
541 | |
542 | #ifdef CONFIG_PA20 |
543 | pdtlb,l %r0(%r28) |
544 | pdtlb,l %r0(%r29) |
545 | #else |
546 | 0: pdtlb %r0(%r28) |
547 | 1: pdtlb %r0(%r29) |
548 | ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) |
549 | ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB) |
550 | #endif |
551 | |
552 | #ifdef CONFIG_64BIT |
553 | /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. |
554 | * Unroll the loop by hand and arrange insn appropriately. |
555 | * GCC probably can do this just as well. |
556 | */ |
557 | |
558 | ldd 0(%r29), %r19 |
559 | ldi (PAGE_SIZE / 128), %r1 |
560 | |
561 | 1: ldd 8(%r29), %r20 |
562 | |
563 | ldd 16(%r29), %r21 |
564 | ldd 24(%r29), %r22 |
565 | std %r19, 0(%r28) |
566 | std %r20, 8(%r28) |
567 | |
568 | ldd 32(%r29), %r19 |
569 | ldd 40(%r29), %r20 |
570 | std %r21, 16(%r28) |
571 | std %r22, 24(%r28) |
572 | |
573 | ldd 48(%r29), %r21 |
574 | ldd 56(%r29), %r22 |
575 | std %r19, 32(%r28) |
576 | std %r20, 40(%r28) |
577 | |
578 | ldd 64(%r29), %r19 |
579 | ldd 72(%r29), %r20 |
580 | std %r21, 48(%r28) |
581 | std %r22, 56(%r28) |
582 | |
583 | ldd 80(%r29), %r21 |
584 | ldd 88(%r29), %r22 |
585 | std %r19, 64(%r28) |
586 | std %r20, 72(%r28) |
587 | |
588 | ldd 96(%r29), %r19 |
589 | ldd 104(%r29), %r20 |
590 | std %r21, 80(%r28) |
591 | std %r22, 88(%r28) |
592 | |
593 | ldd 112(%r29), %r21 |
594 | ldd 120(%r29), %r22 |
595 | std %r19, 96(%r28) |
596 | std %r20, 104(%r28) |
597 | |
598 | ldo 128(%r29), %r29 |
599 | std %r21, 112(%r28) |
600 | std %r22, 120(%r28) |
601 | ldo 128(%r28), %r28 |
602 | |
603 | /* conditional branches nullify on forward taken branch, and on |
604 | * non-taken backward branch. Note that .+4 is a backwards branch. |
605 | * The ldd should only get executed if the branch is taken. |
606 | */ |
607 | addib,COND(>),n -1, %r1, 1b /* bundle 10 */ |
608 | ldd 0(%r29), %r19 /* start next loads */ |
609 | |
610 | #else |
611 | ldi (PAGE_SIZE / 64), %r1 |
612 | |
613 | /* |
614 | * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw |
615 | * bundles (very restricted rules for bundling). It probably |
616 | * does OK on PCXU and better, but we could do better with |
617 | * ldd/std instructions. Note that until (if) we start saving |
618 | * the full 64 bit register values on interrupt, we can't |
619 | * use ldd/std on a 32 bit kernel. |
620 | */ |
621 | |
622 | 1: ldw 0(%r29), %r19 |
623 | ldw 4(%r29), %r20 |
624 | ldw 8(%r29), %r21 |
625 | ldw 12(%r29), %r22 |
626 | stw %r19, 0(%r28) |
627 | stw %r20, 4(%r28) |
628 | stw %r21, 8(%r28) |
629 | stw %r22, 12(%r28) |
630 | ldw 16(%r29), %r19 |
631 | ldw 20(%r29), %r20 |
632 | ldw 24(%r29), %r21 |
633 | ldw 28(%r29), %r22 |
634 | stw %r19, 16(%r28) |
635 | stw %r20, 20(%r28) |
636 | stw %r21, 24(%r28) |
637 | stw %r22, 28(%r28) |
638 | ldw 32(%r29), %r19 |
639 | ldw 36(%r29), %r20 |
640 | ldw 40(%r29), %r21 |
641 | ldw 44(%r29), %r22 |
642 | stw %r19, 32(%r28) |
643 | stw %r20, 36(%r28) |
644 | stw %r21, 40(%r28) |
645 | stw %r22, 44(%r28) |
646 | ldw 48(%r29), %r19 |
647 | ldw 52(%r29), %r20 |
648 | ldw 56(%r29), %r21 |
649 | ldw 60(%r29), %r22 |
650 | stw %r19, 48(%r28) |
651 | stw %r20, 52(%r28) |
652 | stw %r21, 56(%r28) |
653 | stw %r22, 60(%r28) |
654 | ldo 64(%r28), %r28 |
655 | |
656 | addib,COND(>) -1, %r1,1b |
657 | ldo 64(%r29), %r29 |
658 | #endif |
659 | |
660 | bv %r0(%r2) |
661 | nop |
662 | ENDPROC_CFI(copy_user_page_asm) |
663 | |
664 | ENTRY_CFI(clear_user_page_asm) |
665 | tophys_r1 %r26 |
666 | |
667 | ldil L%(TMPALIAS_MAP_START), %r28 |
668 | dep_safe %r25, 31,TMPALIAS_SIZE_BITS, %r28 /* Form aliased virtual address 'to' */ |
669 | depi_safe 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ |
670 | |
671 | /* Purge any old translation */ |
672 | |
673 | #ifdef CONFIG_PA20 |
674 | pdtlb,l %r0(%r28) |
675 | #else |
676 | 0: pdtlb %r0(%r28) |
677 | ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) |
678 | #endif |
679 | |
680 | #ifdef CONFIG_64BIT |
681 | ldi (PAGE_SIZE / 128), %r1 |
682 | |
683 | /* PREFETCH (Write) has not (yet) been proven to help here */ |
684 | /* #define PREFETCHW_OP ldd 256(%0), %r0 */ |
685 | |
686 | 1: std %r0, 0(%r28) |
687 | std %r0, 8(%r28) |
688 | std %r0, 16(%r28) |
689 | std %r0, 24(%r28) |
690 | std %r0, 32(%r28) |
691 | std %r0, 40(%r28) |
692 | std %r0, 48(%r28) |
693 | std %r0, 56(%r28) |
694 | std %r0, 64(%r28) |
695 | std %r0, 72(%r28) |
696 | std %r0, 80(%r28) |
697 | std %r0, 88(%r28) |
698 | std %r0, 96(%r28) |
699 | std %r0, 104(%r28) |
700 | std %r0, 112(%r28) |
701 | std %r0, 120(%r28) |
702 | addib,COND(>) -1, %r1, 1b |
703 | ldo 128(%r28), %r28 |
704 | |
705 | #else /* ! CONFIG_64BIT */ |
706 | ldi (PAGE_SIZE / 64), %r1 |
707 | |
708 | 1: stw %r0, 0(%r28) |
709 | stw %r0, 4(%r28) |
710 | stw %r0, 8(%r28) |
711 | stw %r0, 12(%r28) |
712 | stw %r0, 16(%r28) |
713 | stw %r0, 20(%r28) |
714 | stw %r0, 24(%r28) |
715 | stw %r0, 28(%r28) |
716 | stw %r0, 32(%r28) |
717 | stw %r0, 36(%r28) |
718 | stw %r0, 40(%r28) |
719 | stw %r0, 44(%r28) |
720 | stw %r0, 48(%r28) |
721 | stw %r0, 52(%r28) |
722 | stw %r0, 56(%r28) |
723 | stw %r0, 60(%r28) |
724 | addib,COND(>) -1, %r1, 1b |
725 | ldo 64(%r28), %r28 |
726 | #endif /* CONFIG_64BIT */ |
727 | |
728 | bv %r0(%r2) |
729 | nop |
730 | ENDPROC_CFI(clear_user_page_asm) |
731 | |
732 | ENTRY_CFI(flush_dcache_page_asm) |
733 | ldil L%(TMPALIAS_MAP_START), %r28 |
734 | dep_safe %r25, 31,TMPALIAS_SIZE_BITS, %r28 /* Form aliased virtual address 'to' */ |
735 | depi_safe 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ |
736 | |
737 | /* Purge any old translation */ |
738 | |
739 | #ifdef CONFIG_PA20 |
740 | pdtlb,l %r0(%r28) |
741 | #else |
742 | 0: pdtlb %r0(%r28) |
743 | ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) |
744 | #endif |
745 | |
746 | 88: ldil L%dcache_stride, %r1 |
747 | ldw R%dcache_stride(%r1), r31 |
748 | |
749 | #ifdef CONFIG_64BIT |
750 | depdi,z 1, 63-PAGE_SHIFT,1, %r25 |
751 | #else |
752 | depwi,z 1, 31-PAGE_SHIFT,1, %r25 |
753 | #endif |
754 | add %r28, %r25, %r25 |
755 | sub %r25, r31, %r25 |
756 | |
757 | 1: fdc,m r31(%r28) |
758 | fdc,m r31(%r28) |
759 | fdc,m r31(%r28) |
760 | fdc,m r31(%r28) |
761 | fdc,m r31(%r28) |
762 | fdc,m r31(%r28) |
763 | fdc,m r31(%r28) |
764 | fdc,m r31(%r28) |
765 | fdc,m r31(%r28) |
766 | fdc,m r31(%r28) |
767 | fdc,m r31(%r28) |
768 | fdc,m r31(%r28) |
769 | fdc,m r31(%r28) |
770 | fdc,m r31(%r28) |
771 | fdc,m r31(%r28) |
772 | cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ |
773 | fdc,m r31(%r28) |
774 | |
775 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) |
776 | sync |
777 | bv %r0(%r2) |
778 | nop |
779 | ENDPROC_CFI(flush_dcache_page_asm) |
780 | |
781 | ENTRY_CFI(purge_dcache_page_asm) |
782 | ldil L%(TMPALIAS_MAP_START), %r28 |
783 | dep_safe %r25, 31,TMPALIAS_SIZE_BITS, %r28 /* Form aliased virtual address 'to' */ |
784 | depi_safe 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ |
785 | |
786 | /* Purge any old translation */ |
787 | |
788 | #ifdef CONFIG_PA20 |
789 | pdtlb,l %r0(%r28) |
790 | #else |
791 | 0: pdtlb %r0(%r28) |
792 | ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) |
793 | #endif |
794 | |
795 | 88: ldil L%dcache_stride, %r1 |
796 | ldw R%dcache_stride(%r1), r31 |
797 | |
798 | #ifdef CONFIG_64BIT |
799 | depdi,z 1, 63-PAGE_SHIFT,1, %r25 |
800 | #else |
801 | depwi,z 1, 31-PAGE_SHIFT,1, %r25 |
802 | #endif |
803 | add %r28, %r25, %r25 |
804 | sub %r25, r31, %r25 |
805 | |
806 | 1: pdc,m r31(%r28) |
807 | pdc,m r31(%r28) |
808 | pdc,m r31(%r28) |
809 | pdc,m r31(%r28) |
810 | pdc,m r31(%r28) |
811 | pdc,m r31(%r28) |
812 | pdc,m r31(%r28) |
813 | pdc,m r31(%r28) |
814 | pdc,m r31(%r28) |
815 | pdc,m r31(%r28) |
816 | pdc,m r31(%r28) |
817 | pdc,m r31(%r28) |
818 | pdc,m r31(%r28) |
819 | pdc,m r31(%r28) |
820 | pdc,m r31(%r28) |
821 | cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ |
822 | pdc,m r31(%r28) |
823 | |
824 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) |
825 | sync |
826 | bv %r0(%r2) |
827 | nop |
828 | ENDPROC_CFI(purge_dcache_page_asm) |
829 | |
830 | ENTRY_CFI(flush_icache_page_asm) |
831 | ldil L%(TMPALIAS_MAP_START), %r28 |
832 | dep_safe %r25, 31,TMPALIAS_SIZE_BITS, %r28 /* Form aliased virtual address 'to' */ |
833 | depi_safe 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ |
834 | |
835 | /* Purge any old translation. Note that the FIC instruction |
836 | * may use either the instruction or data TLB. Given that we |
837 | * have a flat address space, it's not clear which TLB will be |
838 | * used. So, we purge both entries. */ |
839 | |
840 | #ifdef CONFIG_PA20 |
841 | pdtlb,l %r0(%r28) |
842 | 1: pitlb,l %r0(%sr4,%r28) |
843 | ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP) |
844 | #else |
845 | 0: pdtlb %r0(%r28) |
846 | 1: pitlb %r0(%sr4,%r28) |
847 | ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) |
848 | ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB) |
849 | ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP) |
850 | #endif |
851 | |
852 | 88: ldil L%icache_stride, %r1 |
853 | ldw R%icache_stride(%r1), %r31 |
854 | |
855 | #ifdef CONFIG_64BIT |
856 | depdi,z 1, 63-PAGE_SHIFT,1, %r25 |
857 | #else |
858 | depwi,z 1, 31-PAGE_SHIFT,1, %r25 |
859 | #endif |
860 | add %r28, %r25, %r25 |
861 | sub %r25, %r31, %r25 |
862 | |
863 | /* fic only has the type 26 form on PA1.1, requiring an |
864 | * explicit space specification, so use %sr4 */ |
865 | 1: fic,m %r31(%sr4,%r28) |
866 | fic,m %r31(%sr4,%r28) |
867 | fic,m %r31(%sr4,%r28) |
868 | fic,m %r31(%sr4,%r28) |
869 | fic,m %r31(%sr4,%r28) |
870 | fic,m %r31(%sr4,%r28) |
871 | fic,m %r31(%sr4,%r28) |
872 | fic,m %r31(%sr4,%r28) |
873 | fic,m %r31(%sr4,%r28) |
874 | fic,m %r31(%sr4,%r28) |
875 | fic,m %r31(%sr4,%r28) |
876 | fic,m %r31(%sr4,%r28) |
877 | fic,m %r31(%sr4,%r28) |
878 | fic,m %r31(%sr4,%r28) |
879 | fic,m %r31(%sr4,%r28) |
880 | cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ |
881 | fic,m %r31(%sr4,%r28) |
882 | |
883 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) |
884 | sync |
885 | bv %r0(%r2) |
886 | nop |
887 | ENDPROC_CFI(flush_icache_page_asm) |
888 | |
889 | ENTRY_CFI(flush_kernel_dcache_page_asm) |
890 | 88: ldil L%dcache_stride, %r1 |
891 | ldw R%dcache_stride(%r1), %r23 |
892 | depi_safe 0, 31,PAGE_SHIFT, %r26 /* Clear any offset bits */ |
893 | |
894 | #ifdef CONFIG_64BIT |
895 | depdi,z 1, 63-PAGE_SHIFT,1, %r25 |
896 | #else |
897 | depwi,z 1, 31-PAGE_SHIFT,1, %r25 |
898 | #endif |
899 | add %r26, %r25, %r25 |
900 | sub %r25, %r23, %r25 |
901 | |
902 | 1: fdc,m %r23(%r26) |
903 | fdc,m %r23(%r26) |
904 | fdc,m %r23(%r26) |
905 | fdc,m %r23(%r26) |
906 | fdc,m %r23(%r26) |
907 | fdc,m %r23(%r26) |
908 | fdc,m %r23(%r26) |
909 | fdc,m %r23(%r26) |
910 | fdc,m %r23(%r26) |
911 | fdc,m %r23(%r26) |
912 | fdc,m %r23(%r26) |
913 | fdc,m %r23(%r26) |
914 | fdc,m %r23(%r26) |
915 | fdc,m %r23(%r26) |
916 | fdc,m %r23(%r26) |
917 | cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ |
918 | fdc,m %r23(%r26) |
919 | |
920 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) |
921 | sync |
922 | bv %r0(%r2) |
923 | nop |
924 | ENDPROC_CFI(flush_kernel_dcache_page_asm) |
925 | |
926 | ENTRY_CFI(purge_kernel_dcache_page_asm) |
927 | 88: ldil L%dcache_stride, %r1 |
928 | ldw R%dcache_stride(%r1), %r23 |
929 | depi_safe 0, 31,PAGE_SHIFT, %r26 /* Clear any offset bits */ |
930 | |
931 | #ifdef CONFIG_64BIT |
932 | depdi,z 1, 63-PAGE_SHIFT,1, %r25 |
933 | #else |
934 | depwi,z 1, 31-PAGE_SHIFT,1, %r25 |
935 | #endif |
936 | add %r26, %r25, %r25 |
937 | sub %r25, %r23, %r25 |
938 | |
939 | 1: pdc,m %r23(%r26) |
940 | pdc,m %r23(%r26) |
941 | pdc,m %r23(%r26) |
942 | pdc,m %r23(%r26) |
943 | pdc,m %r23(%r26) |
944 | pdc,m %r23(%r26) |
945 | pdc,m %r23(%r26) |
946 | pdc,m %r23(%r26) |
947 | pdc,m %r23(%r26) |
948 | pdc,m %r23(%r26) |
949 | pdc,m %r23(%r26) |
950 | pdc,m %r23(%r26) |
951 | pdc,m %r23(%r26) |
952 | pdc,m %r23(%r26) |
953 | pdc,m %r23(%r26) |
954 | cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ |
955 | pdc,m %r23(%r26) |
956 | |
957 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) |
958 | sync |
959 | bv %r0(%r2) |
960 | nop |
961 | ENDPROC_CFI(purge_kernel_dcache_page_asm) |
962 | |
963 | ENTRY_CFI(flush_user_dcache_range_asm) |
964 | 88: ldil L%dcache_stride, %r1 |
965 | ldw R%dcache_stride(%r1), %r23 |
966 | ldo -1(%r23), %r21 |
967 | ANDCM %r26, %r21, %r26 |
968 | |
969 | #ifdef CONFIG_64BIT |
970 | depd,z %r23, 59, 60, %r21 |
971 | #else |
972 | depw,z %r23, 27, 28, %r21 |
973 | #endif |
974 | add %r26, %r21, %r22 |
975 | cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ |
976 | 1: add %r22, %r21, %r22 |
977 | fdc,m %r23(%sr3, %r26) |
978 | fdc,m %r23(%sr3, %r26) |
979 | fdc,m %r23(%sr3, %r26) |
980 | fdc,m %r23(%sr3, %r26) |
981 | fdc,m %r23(%sr3, %r26) |
982 | fdc,m %r23(%sr3, %r26) |
983 | fdc,m %r23(%sr3, %r26) |
984 | fdc,m %r23(%sr3, %r26) |
985 | fdc,m %r23(%sr3, %r26) |
986 | fdc,m %r23(%sr3, %r26) |
987 | fdc,m %r23(%sr3, %r26) |
988 | fdc,m %r23(%sr3, %r26) |
989 | fdc,m %r23(%sr3, %r26) |
990 | fdc,m %r23(%sr3, %r26) |
991 | fdc,m %r23(%sr3, %r26) |
992 | cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ |
993 | fdc,m %r23(%sr3, %r26) |
994 | |
995 | 2: cmpb,COND(>>),n %r25, %r26, 2b |
996 | fdc,m %r23(%sr3, %r26) |
997 | |
998 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) |
999 | sync |
1000 | bv %r0(%r2) |
1001 | nop |
1002 | ENDPROC_CFI(flush_user_dcache_range_asm) |
1003 | |
1004 | ENTRY_CFI(flush_kernel_dcache_range_asm) |
1005 | 88: ldil L%dcache_stride, %r1 |
1006 | ldw R%dcache_stride(%r1), %r23 |
1007 | ldo -1(%r23), %r21 |
1008 | ANDCM %r26, %r21, %r26 |
1009 | |
1010 | #ifdef CONFIG_64BIT |
1011 | depd,z %r23, 59, 60, %r21 |
1012 | #else |
1013 | depw,z %r23, 27, 28, %r21 |
1014 | #endif |
1015 | add %r26, %r21, %r22 |
1016 | cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ |
1017 | 1: add %r22, %r21, %r22 |
1018 | fdc,m %r23(%r26) |
1019 | fdc,m %r23(%r26) |
1020 | fdc,m %r23(%r26) |
1021 | fdc,m %r23(%r26) |
1022 | fdc,m %r23(%r26) |
1023 | fdc,m %r23(%r26) |
1024 | fdc,m %r23(%r26) |
1025 | fdc,m %r23(%r26) |
1026 | fdc,m %r23(%r26) |
1027 | fdc,m %r23(%r26) |
1028 | fdc,m %r23(%r26) |
1029 | fdc,m %r23(%r26) |
1030 | fdc,m %r23(%r26) |
1031 | fdc,m %r23(%r26) |
1032 | fdc,m %r23(%r26) |
1033 | cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ |
1034 | fdc,m %r23(%r26) |
1035 | |
1036 | 2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ |
1037 | fdc,m %r23(%r26) |
1038 | |
1039 | sync |
1040 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) |
1041 | bv %r0(%r2) |
1042 | nop |
1043 | ENDPROC_CFI(flush_kernel_dcache_range_asm) |
1044 | |
1045 | ENTRY_CFI(purge_kernel_dcache_range_asm) |
1046 | 88: ldil L%dcache_stride, %r1 |
1047 | ldw R%dcache_stride(%r1), %r23 |
1048 | ldo -1(%r23), %r21 |
1049 | ANDCM %r26, %r21, %r26 |
1050 | |
1051 | #ifdef CONFIG_64BIT |
1052 | depd,z %r23, 59, 60, %r21 |
1053 | #else |
1054 | depw,z %r23, 27, 28, %r21 |
1055 | #endif |
1056 | add %r26, %r21, %r22 |
1057 | cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ |
1058 | 1: add %r22, %r21, %r22 |
1059 | pdc,m %r23(%r26) |
1060 | pdc,m %r23(%r26) |
1061 | pdc,m %r23(%r26) |
1062 | pdc,m %r23(%r26) |
1063 | pdc,m %r23(%r26) |
1064 | pdc,m %r23(%r26) |
1065 | pdc,m %r23(%r26) |
1066 | pdc,m %r23(%r26) |
1067 | pdc,m %r23(%r26) |
1068 | pdc,m %r23(%r26) |
1069 | pdc,m %r23(%r26) |
1070 | pdc,m %r23(%r26) |
1071 | pdc,m %r23(%r26) |
1072 | pdc,m %r23(%r26) |
1073 | pdc,m %r23(%r26) |
1074 | cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ |
1075 | pdc,m %r23(%r26) |
1076 | |
1077 | 2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ |
1078 | pdc,m %r23(%r26) |
1079 | |
1080 | sync |
1081 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) |
1082 | bv %r0(%r2) |
1083 | nop |
1084 | ENDPROC_CFI(purge_kernel_dcache_range_asm) |
1085 | |
1086 | ENTRY_CFI(flush_user_icache_range_asm) |
1087 | 88: ldil L%icache_stride, %r1 |
1088 | ldw R%icache_stride(%r1), %r23 |
1089 | ldo -1(%r23), %r21 |
1090 | ANDCM %r26, %r21, %r26 |
1091 | |
1092 | #ifdef CONFIG_64BIT |
1093 | depd,z %r23, 59, 60, %r21 |
1094 | #else |
1095 | depw,z %r23, 27, 28, %r21 |
1096 | #endif |
1097 | add %r26, %r21, %r22 |
1098 | cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ |
1099 | 1: add %r22, %r21, %r22 |
1100 | fic,m %r23(%sr3, %r26) |
1101 | fic,m %r23(%sr3, %r26) |
1102 | fic,m %r23(%sr3, %r26) |
1103 | fic,m %r23(%sr3, %r26) |
1104 | fic,m %r23(%sr3, %r26) |
1105 | fic,m %r23(%sr3, %r26) |
1106 | fic,m %r23(%sr3, %r26) |
1107 | fic,m %r23(%sr3, %r26) |
1108 | fic,m %r23(%sr3, %r26) |
1109 | fic,m %r23(%sr3, %r26) |
1110 | fic,m %r23(%sr3, %r26) |
1111 | fic,m %r23(%sr3, %r26) |
1112 | fic,m %r23(%sr3, %r26) |
1113 | fic,m %r23(%sr3, %r26) |
1114 | fic,m %r23(%sr3, %r26) |
1115 | cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ |
1116 | fic,m %r23(%sr3, %r26) |
1117 | |
1118 | 2: cmpb,COND(>>),n %r25, %r26, 2b |
1119 | fic,m %r23(%sr3, %r26) |
1120 | |
1121 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) |
1122 | sync |
1123 | bv %r0(%r2) |
1124 | nop |
1125 | ENDPROC_CFI(flush_user_icache_range_asm) |
1126 | |
1127 | ENTRY_CFI(flush_kernel_icache_page) |
1128 | 88: ldil L%icache_stride, %r1 |
1129 | ldw R%icache_stride(%r1), %r23 |
1130 | |
1131 | #ifdef CONFIG_64BIT |
1132 | depdi,z 1, 63-PAGE_SHIFT,1, %r25 |
1133 | #else |
1134 | depwi,z 1, 31-PAGE_SHIFT,1, %r25 |
1135 | #endif |
1136 | add %r26, %r25, %r25 |
1137 | sub %r25, %r23, %r25 |
1138 | |
1139 | |
1140 | 1: fic,m %r23(%sr4, %r26) |
1141 | fic,m %r23(%sr4, %r26) |
1142 | fic,m %r23(%sr4, %r26) |
1143 | fic,m %r23(%sr4, %r26) |
1144 | fic,m %r23(%sr4, %r26) |
1145 | fic,m %r23(%sr4, %r26) |
1146 | fic,m %r23(%sr4, %r26) |
1147 | fic,m %r23(%sr4, %r26) |
1148 | fic,m %r23(%sr4, %r26) |
1149 | fic,m %r23(%sr4, %r26) |
1150 | fic,m %r23(%sr4, %r26) |
1151 | fic,m %r23(%sr4, %r26) |
1152 | fic,m %r23(%sr4, %r26) |
1153 | fic,m %r23(%sr4, %r26) |
1154 | fic,m %r23(%sr4, %r26) |
1155 | cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ |
1156 | fic,m %r23(%sr4, %r26) |
1157 | |
1158 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) |
1159 | sync |
1160 | bv %r0(%r2) |
1161 | nop |
1162 | ENDPROC_CFI(flush_kernel_icache_page) |
1163 | |
1164 | ENTRY_CFI(flush_kernel_icache_range_asm) |
1165 | 88: ldil L%icache_stride, %r1 |
1166 | ldw R%icache_stride(%r1), %r23 |
1167 | ldo -1(%r23), %r21 |
1168 | ANDCM %r26, %r21, %r26 |
1169 | |
1170 | #ifdef CONFIG_64BIT |
1171 | depd,z %r23, 59, 60, %r21 |
1172 | #else |
1173 | depw,z %r23, 27, 28, %r21 |
1174 | #endif |
1175 | add %r26, %r21, %r22 |
1176 | cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ |
1177 | 1: add %r22, %r21, %r22 |
1178 | fic,m %r23(%sr4, %r26) |
1179 | fic,m %r23(%sr4, %r26) |
1180 | fic,m %r23(%sr4, %r26) |
1181 | fic,m %r23(%sr4, %r26) |
1182 | fic,m %r23(%sr4, %r26) |
1183 | fic,m %r23(%sr4, %r26) |
1184 | fic,m %r23(%sr4, %r26) |
1185 | fic,m %r23(%sr4, %r26) |
1186 | fic,m %r23(%sr4, %r26) |
1187 | fic,m %r23(%sr4, %r26) |
1188 | fic,m %r23(%sr4, %r26) |
1189 | fic,m %r23(%sr4, %r26) |
1190 | fic,m %r23(%sr4, %r26) |
1191 | fic,m %r23(%sr4, %r26) |
1192 | fic,m %r23(%sr4, %r26) |
1193 | cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ |
1194 | fic,m %r23(%sr4, %r26) |
1195 | |
1196 | 2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ |
1197 | fic,m %r23(%sr4, %r26) |
1198 | |
1199 | 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) |
1200 | sync |
1201 | bv %r0(%r2) |
1202 | nop |
1203 | ENDPROC_CFI(flush_kernel_icache_range_asm) |
1204 | |
1205 | .text |
1206 | |
1207 | /* align should cover use of rfi in disable_sr_hashing_asm and |
1208 | * srdis_done. |
1209 | */ |
1210 | .align 256 |
1211 | ENTRY_CFI(disable_sr_hashing_asm) |
1212 | /* |
1213 | * Switch to real mode |
1214 | */ |
1215 | /* pcxt_ssm_bug */ |
1216 | rsm PSW_SM_I, %r0 |
1217 | load32 PA(1f), %r1 |
1218 | nop |
1219 | nop |
1220 | nop |
1221 | nop |
1222 | nop |
1223 | |
1224 | rsm PSW_SM_Q, %r0 /* prep to load iia queue */ |
1225 | mtctl %r0, %cr17 /* Clear IIASQ tail */ |
1226 | mtctl %r0, %cr17 /* Clear IIASQ head */ |
1227 | mtctl %r1, %cr18 /* IIAOQ head */ |
1228 | ldo 4(%r1), %r1 |
1229 | mtctl %r1, %cr18 /* IIAOQ tail */ |
1230 | load32 REAL_MODE_PSW, %r1 |
1231 | mtctl %r1, %ipsw |
1232 | rfi |
1233 | nop |
1234 | |
1235 | 1: cmpib,=,n SRHASH_PCXST, %r26,srdis_pcxs |
1236 | cmpib,=,n SRHASH_PCXL, %r26,srdis_pcxl |
1237 | cmpib,=,n SRHASH_PA20, %r26,srdis_pa20 |
1238 | b,n srdis_done |
1239 | |
1240 | srdis_pcxs: |
1241 | |
1242 | /* Disable Space Register Hashing for PCXS,PCXT,PCXT' */ |
1243 | |
1244 | .word 0x141c1a00 /* mfdiag %dr0, %r28 */ |
1245 | .word 0x141c1a00 /* must issue twice */ |
1246 | depwi 0,18,1, %r28 /* Clear DHE (dcache hash enable) */ |
1247 | depwi 0,20,1, %r28 /* Clear IHE (icache hash enable) */ |
1248 | .word 0x141c1600 /* mtdiag %r28, %dr0 */ |
1249 | .word 0x141c1600 /* must issue twice */ |
1250 | b,n srdis_done |
1251 | |
1252 | srdis_pcxl: |
1253 | |
1254 | /* Disable Space Register Hashing for PCXL */ |
1255 | |
1256 | .word 0x141c0600 /* mfdiag %dr0, %r28 */ |
1257 | depwi 0,28,2, %r28 /* Clear DHASH_EN & IHASH_EN */ |
1258 | .word 0x141c0240 /* mtdiag %r28, %dr0 */ |
1259 | b,n srdis_done |
1260 | |
1261 | srdis_pa20: |
1262 | |
1263 | /* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */ |
1264 | |
1265 | .word 0x144008bc /* mfdiag %dr2, %r28 */ |
1266 | depdi 0, 54,1, %r28 /* clear DIAG_SPHASH_ENAB (bit 54) */ |
1267 | .word 0x145c1840 /* mtdiag %r28, %dr2 */ |
1268 | |
1269 | |
1270 | srdis_done: |
1271 | /* Switch back to virtual mode */ |
1272 | rsm PSW_SM_I, %r0 /* prep to load iia queue */ |
1273 | load32 2f, %r1 |
1274 | nop |
1275 | nop |
1276 | nop |
1277 | nop |
1278 | nop |
1279 | |
1280 | rsm PSW_SM_Q, %r0 /* prep to load iia queue */ |
1281 | mtctl %r0, %cr17 /* Clear IIASQ tail */ |
1282 | mtctl %r0, %cr17 /* Clear IIASQ head */ |
1283 | mtctl %r1, %cr18 /* IIAOQ head */ |
1284 | ldo 4(%r1), %r1 |
1285 | mtctl %r1, %cr18 /* IIAOQ tail */ |
1286 | load32 KERNEL_PSW, %r1 |
1287 | mtctl %r1, %ipsw |
1288 | rfi |
1289 | nop |
1290 | |
1291 | 2: bv %r0(%r2) |
1292 | nop |
1293 | ENDPROC_CFI(disable_sr_hashing_asm) |
1294 | |
1295 | .end |
1296 | |