1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * linux/arch/arm/mm/proc-xsc3.S |
4 | * |
5 | * Original Author: Matthew Gilbert |
6 | * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> |
7 | * |
8 | * Copyright 2004 (C) Intel Corp. |
9 | * Copyright 2005 (C) MontaVista Software, Inc. |
10 | * |
11 | * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is |
12 | * an extension to Intel's original XScale core that adds the following |
13 | * features: |
14 | * |
15 | * - ARMv6 Supersections |
16 | * - Low Locality Reference pages (replaces mini-cache) |
17 | * - 36-bit addressing |
18 | * - L2 cache |
19 | * - Cache coherency if chipset supports it |
20 | * |
21 | * Based on original XScale code by Nicolas Pitre. |
22 | */ |
23 | |
24 | #include <linux/linkage.h> |
25 | #include <linux/init.h> |
26 | #include <linux/pgtable.h> |
27 | #include <asm/assembler.h> |
28 | #include <asm/hwcap.h> |
29 | #include <asm/pgtable-hwdef.h> |
30 | #include <asm/page.h> |
31 | #include <asm/ptrace.h> |
32 | #include "proc-macros.S" |
33 | |
34 | /* |
35 | * This is the maximum size of an area which will be flushed. If the |
36 | * area is larger than this, then we flush the whole cache. |
37 | */ |
38 | #define MAX_AREA_SIZE 32768 |
39 | |
40 | /* |
41 | * The cache line size of the L1 I, L1 D and unified L2 cache. |
42 | */ |
43 | #define CACHELINESIZE 32 |
44 | |
45 | /* |
46 | * The size of the L1 D cache. |
47 | */ |
48 | #define CACHESIZE 32768 |
49 | |
50 | /* |
51 | * This macro is used to wait for a CP15 write and is needed when we |
52 | * have to ensure that the last operation to the coprocessor was |
53 | * completed before continuing with operation. |
54 | */ |
55 | .macro cpwait_ret, lr, rd |
56 | mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 |
57 | sub pc, \lr, \rd, LSR #32 @ wait for completion and |
58 | @ flush instruction pipeline |
59 | .endm |
60 | |
61 | /* |
62 | * This macro cleans and invalidates the entire L1 D cache. |
63 | */ |
64 | |
65 | .macro clean_d_cache rd, rs |
66 | mov \rd, #0x1f00 |
67 | orr \rd, \rd, #0x00e0 |
68 | 1: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line |
69 | adds \rd, \rd, #0x40000000 |
70 | bcc 1b |
71 | subs \rd, \rd, #0x20 |
72 | bpl 1b |
73 | .endm |
74 | |
75 | .text |
76 | |
77 | /* |
78 | * cpu_xsc3_proc_init() |
79 | * |
80 | * Nothing too exciting at the moment |
81 | */ |
82 | ENTRY(cpu_xsc3_proc_init) |
83 | ret lr |
84 | |
85 | /* |
86 | * cpu_xsc3_proc_fin() |
87 | */ |
88 | ENTRY(cpu_xsc3_proc_fin) |
89 | mrc p15, 0, r0, c1, c0, 0 @ ctrl register |
90 | bic r0, r0, #0x1800 @ ...IZ........... |
91 | bic r0, r0, #0x0006 @ .............CA. |
92 | mcr p15, 0, r0, c1, c0, 0 @ disable caches |
93 | ret lr |
94 | |
95 | /* |
96 | * cpu_xsc3_reset(loc) |
97 | * |
98 | * Perform a soft reset of the system. Put the CPU into the |
99 | * same state as it would be if it had been reset, and branch |
100 | * to what would be the reset vector. |
101 | * |
102 | * loc: location to jump to for soft reset |
103 | */ |
104 | .align 5 |
105 | .pushsection .idmap.text, "ax" |
106 | ENTRY(cpu_xsc3_reset) |
107 | mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE |
108 | msr cpsr_c, r1 @ reset CPSR |
109 | mrc p15, 0, r1, c1, c0, 0 @ ctrl register |
110 | bic r1, r1, #0x3900 @ ..VIZ..S........ |
111 | bic r1, r1, #0x0086 @ ........B....CA. |
112 | mcr p15, 0, r1, c1, c0, 0 @ ctrl register |
113 | mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB |
114 | bic r1, r1, #0x0001 @ ...............M |
115 | mcr p15, 0, r1, c1, c0, 0 @ ctrl register |
116 | @ CAUTION: MMU turned off from this point. We count on the pipeline |
117 | @ already containing those two last instructions to survive. |
118 | mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs |
119 | ret r0 |
120 | ENDPROC(cpu_xsc3_reset) |
121 | .popsection |
122 | |
123 | /* |
124 | * cpu_xsc3_do_idle() |
125 | * |
126 | * Cause the processor to idle |
127 | * |
128 | * For now we do nothing but go to idle mode for every case |
129 | * |
130 | * XScale supports clock switching, but using idle mode support |
131 | * allows external hardware to react to system state changes. |
132 | */ |
133 | .align 5 |
134 | |
135 | ENTRY(cpu_xsc3_do_idle) |
136 | mov r0, #1 |
137 | mcr p14, 0, r0, c7, c0, 0 @ go to idle |
138 | ret lr |
139 | |
140 | /* ================================= CACHE ================================ */ |
141 | |
142 | /* |
143 | * flush_icache_all() |
144 | * |
145 | * Unconditionally clean and invalidate the entire icache. |
146 | */ |
147 | ENTRY(xsc3_flush_icache_all) |
148 | mov r0, #0 |
149 | mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache |
150 | ret lr |
151 | ENDPROC(xsc3_flush_icache_all) |
152 | |
153 | /* |
154 | * flush_user_cache_all() |
155 | * |
156 | * Invalidate all cache entries in a particular address |
157 | * space. |
158 | */ |
159 | ENTRY(xsc3_flush_user_cache_all) |
160 | /* FALLTHROUGH */ |
161 | |
162 | /* |
163 | * flush_kern_cache_all() |
164 | * |
165 | * Clean and invalidate the entire cache. |
166 | */ |
167 | ENTRY(xsc3_flush_kern_cache_all) |
168 | mov r2, #VM_EXEC |
169 | mov ip, #0 |
170 | __flush_whole_cache: |
171 | clean_d_cache r0, r1 |
172 | tst r2, #VM_EXEC |
173 | mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB |
174 | mcrne p15, 0, ip, c7, c10, 4 @ data write barrier |
175 | mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush |
176 | ret lr |
177 | |
178 | /* |
179 | * flush_user_cache_range(start, end, vm_flags) |
180 | * |
181 | * Invalidate a range of cache entries in the specified |
182 | * address space. |
183 | * |
184 | * - start - start address (may not be aligned) |
185 | * - end - end address (exclusive, may not be aligned) |
186 | * - vma - vma_area_struct describing address space |
187 | */ |
188 | .align 5 |
189 | ENTRY(xsc3_flush_user_cache_range) |
190 | mov ip, #0 |
191 | sub r3, r1, r0 @ calculate total size |
192 | cmp r3, #MAX_AREA_SIZE |
193 | bhs __flush_whole_cache |
194 | |
195 | 1: tst r2, #VM_EXEC |
196 | mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line |
197 | mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line |
198 | add r0, r0, #CACHELINESIZE |
199 | cmp r0, r1 |
200 | blo 1b |
201 | tst r2, #VM_EXEC |
202 | mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB |
203 | mcrne p15, 0, ip, c7, c10, 4 @ data write barrier |
204 | mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush |
205 | ret lr |
206 | |
207 | /* |
208 | * coherent_kern_range(start, end) |
209 | * |
210 | * Ensure coherency between the I cache and the D cache in the |
211 | * region described by start. If you have non-snooping |
212 | * Harvard caches, you need to implement this function. |
213 | * |
214 | * - start - virtual start address |
215 | * - end - virtual end address |
216 | * |
217 | * Note: single I-cache line invalidation isn't used here since |
218 | * it also trashes the mini I-cache used by JTAG debuggers. |
219 | */ |
220 | ENTRY(xsc3_coherent_kern_range) |
221 | /* FALLTHROUGH */ |
222 | ENTRY(xsc3_coherent_user_range) |
223 | bic r0, r0, #CACHELINESIZE - 1 |
224 | 1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line |
225 | add r0, r0, #CACHELINESIZE |
226 | cmp r0, r1 |
227 | blo 1b |
228 | mov r0, #0 |
229 | mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB |
230 | mcr p15, 0, r0, c7, c10, 4 @ data write barrier |
231 | mcr p15, 0, r0, c7, c5, 4 @ prefetch flush |
232 | ret lr |
233 | |
234 | /* |
235 | * flush_kern_dcache_area(void *addr, size_t size) |
236 | * |
237 | * Ensure no D cache aliasing occurs, either with itself or |
238 | * the I cache. |
239 | * |
240 | * - addr - kernel address |
241 | * - size - region size |
242 | */ |
243 | ENTRY(xsc3_flush_kern_dcache_area) |
244 | add r1, r0, r1 |
245 | 1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line |
246 | add r0, r0, #CACHELINESIZE |
247 | cmp r0, r1 |
248 | blo 1b |
249 | mov r0, #0 |
250 | mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB |
251 | mcr p15, 0, r0, c7, c10, 4 @ data write barrier |
252 | mcr p15, 0, r0, c7, c5, 4 @ prefetch flush |
253 | ret lr |
254 | |
255 | /* |
256 | * dma_inv_range(start, end) |
257 | * |
258 | * Invalidate (discard) the specified virtual address range. |
259 | * May not write back any entries. If 'start' or 'end' |
260 | * are not cache line aligned, those lines must be written |
261 | * back. |
262 | * |
263 | * - start - virtual start address |
264 | * - end - virtual end address |
265 | */ |
266 | xsc3_dma_inv_range: |
267 | tst r0, #CACHELINESIZE - 1 |
268 | bic r0, r0, #CACHELINESIZE - 1 |
269 | mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line |
270 | tst r1, #CACHELINESIZE - 1 |
271 | mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line |
272 | 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line |
273 | add r0, r0, #CACHELINESIZE |
274 | cmp r0, r1 |
275 | blo 1b |
276 | mcr p15, 0, r0, c7, c10, 4 @ data write barrier |
277 | ret lr |
278 | |
279 | /* |
280 | * dma_clean_range(start, end) |
281 | * |
282 | * Clean the specified virtual address range. |
283 | * |
284 | * - start - virtual start address |
285 | * - end - virtual end address |
286 | */ |
287 | xsc3_dma_clean_range: |
288 | bic r0, r0, #CACHELINESIZE - 1 |
289 | 1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line |
290 | add r0, r0, #CACHELINESIZE |
291 | cmp r0, r1 |
292 | blo 1b |
293 | mcr p15, 0, r0, c7, c10, 4 @ data write barrier |
294 | ret lr |
295 | |
296 | /* |
297 | * dma_flush_range(start, end) |
298 | * |
299 | * Clean and invalidate the specified virtual address range. |
300 | * |
301 | * - start - virtual start address |
302 | * - end - virtual end address |
303 | */ |
304 | ENTRY(xsc3_dma_flush_range) |
305 | bic r0, r0, #CACHELINESIZE - 1 |
306 | 1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line |
307 | add r0, r0, #CACHELINESIZE |
308 | cmp r0, r1 |
309 | blo 1b |
310 | mcr p15, 0, r0, c7, c10, 4 @ data write barrier |
311 | ret lr |
312 | |
313 | /* |
314 | * dma_map_area(start, size, dir) |
315 | * - start - kernel virtual start address |
316 | * - size - size of region |
317 | * - dir - DMA direction |
318 | */ |
319 | ENTRY(xsc3_dma_map_area) |
320 | add r1, r1, r0 |
321 | cmp r2, #DMA_TO_DEVICE |
322 | beq xsc3_dma_clean_range |
323 | bcs xsc3_dma_inv_range |
324 | b xsc3_dma_flush_range |
325 | ENDPROC(xsc3_dma_map_area) |
326 | |
327 | /* |
328 | * dma_unmap_area(start, size, dir) |
329 | * - start - kernel virtual start address |
330 | * - size - size of region |
331 | * - dir - DMA direction |
332 | */ |
333 | ENTRY(xsc3_dma_unmap_area) |
334 | ret lr |
335 | ENDPROC(xsc3_dma_unmap_area) |
336 | |
337 | .globl xsc3_flush_kern_cache_louis |
338 | .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all |
339 | |
340 | @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) |
341 | define_cache_functions xsc3 |
342 | |
343 | ENTRY(cpu_xsc3_dcache_clean_area) |
344 | 1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line |
345 | add r0, r0, #CACHELINESIZE |
346 | subs r1, r1, #CACHELINESIZE |
347 | bhi 1b |
348 | ret lr |
349 | |
350 | /* =============================== PageTable ============================== */ |
351 | |
352 | /* |
353 | * cpu_xsc3_switch_mm(pgd) |
354 | * |
355 | * Set the translation base pointer to be as described by pgd. |
356 | * |
357 | * pgd: new page tables |
358 | */ |
359 | .align 5 |
360 | ENTRY(cpu_xsc3_switch_mm) |
361 | clean_d_cache r1, r2 |
362 | mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB |
363 | mcr p15, 0, ip, c7, c10, 4 @ data write barrier |
364 | mcr p15, 0, ip, c7, c5, 4 @ prefetch flush |
365 | orr r0, r0, #0x18 @ cache the page table in L2 |
366 | mcr p15, 0, r0, c2, c0, 0 @ load page table pointer |
367 | mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs |
368 | cpwait_ret lr, ip |
369 | |
370 | /* |
371 | * cpu_xsc3_set_pte_ext(ptep, pte, ext) |
372 | * |
373 | * Set a PTE and flush it out |
374 | */ |
375 | cpu_xsc3_mt_table: |
376 | .long 0x00 @ L_PTE_MT_UNCACHED |
377 | .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE |
378 | .long PTE_EXT_TEX(5) | PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH |
379 | .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK |
380 | .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED |
381 | .long 0x00 @ unused |
382 | .long 0x00 @ L_PTE_MT_MINICACHE (not present) |
383 | .long PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC (not present?) |
384 | .long 0x00 @ unused |
385 | .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC |
386 | .long 0x00 @ unused |
387 | .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED |
388 | .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED |
389 | .long 0x00 @ unused |
390 | .long 0x00 @ unused |
391 | .long 0x00 @ unused |
392 | |
393 | .align 5 |
394 | ENTRY(cpu_xsc3_set_pte_ext) |
395 | xscale_set_pte_ext_prologue |
396 | |
397 | tst r1, #L_PTE_SHARED @ shared? |
398 | and r1, r1, #L_PTE_MT_MASK |
399 | adr ip, cpu_xsc3_mt_table |
400 | ldr ip, [ip, r1] |
401 | orrne r2, r2, #PTE_EXT_COHERENT @ interlock: mask in coherent bit |
402 | bic r2, r2, #0x0c @ clear old C,B bits |
403 | orr r2, r2, ip |
404 | |
405 | xscale_set_pte_ext_epilogue |
406 | ret lr |
407 | |
408 | .ltorg |
409 | .align |
410 | |
411 | .globl cpu_xsc3_suspend_size |
412 | .equ cpu_xsc3_suspend_size, 4 * 6 |
413 | #ifdef CONFIG_ARM_CPU_SUSPEND |
414 | ENTRY(cpu_xsc3_do_suspend) |
415 | stmfd sp!, {r4 - r9, lr} |
416 | mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode |
417 | mrc p15, 0, r5, c15, c1, 0 @ CP access reg |
418 | mrc p15, 0, r6, c13, c0, 0 @ PID |
419 | mrc p15, 0, r7, c3, c0, 0 @ domain ID |
420 | mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg |
421 | mrc p15, 0, r9, c1, c0, 0 @ control reg |
422 | bic r4, r4, #2 @ clear frequency change bit |
423 | stmia r0, {r4 - r9} @ store cp regs |
424 | ldmia sp!, {r4 - r9, pc} |
425 | ENDPROC(cpu_xsc3_do_suspend) |
426 | |
427 | ENTRY(cpu_xsc3_do_resume) |
428 | ldmia r0, {r4 - r9} @ load cp regs |
429 | mov ip, #0 |
430 | mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB |
431 | mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer |
432 | mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer |
433 | mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs |
434 | mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. |
435 | mcr p15, 0, r5, c15, c1, 0 @ CP access reg |
436 | mcr p15, 0, r6, c13, c0, 0 @ PID |
437 | mcr p15, 0, r7, c3, c0, 0 @ domain ID |
438 | orr r1, r1, #0x18 @ cache the page table in L2 |
439 | mcr p15, 0, r1, c2, c0, 0 @ translation table base addr |
440 | mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg |
441 | mov r0, r9 @ control register |
442 | b cpu_resume_mmu |
443 | ENDPROC(cpu_xsc3_do_resume) |
444 | #endif |
445 | |
446 | .type __xsc3_setup, #function |
447 | __xsc3_setup: |
448 | mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE |
449 | msr cpsr_c, r0 |
450 | mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB |
451 | mcr p15, 0, ip, c7, c10, 4 @ data write barrier |
452 | mcr p15, 0, ip, c7, c5, 4 @ prefetch flush |
453 | mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs |
454 | orr r4, r4, #0x18 @ cache the page table in L2 |
455 | mcr p15, 0, r4, c2, c0, 0 @ load page table pointer |
456 | |
457 | mov r0, #1 << 6 @ cp6 access for early sched_clock |
458 | mcr p15, 0, r0, c15, c1, 0 @ write CP access register |
459 | |
460 | mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg |
461 | and r0, r0, #2 @ preserve bit P bit setting |
462 | orr r0, r0, #(1 << 10) @ enable L2 for LLR cache |
463 | mcr p15, 0, r0, c1, c0, 1 @ set auxiliary control reg |
464 | |
465 | adr r5, xsc3_crval |
466 | ldmia r5, {r5, r6} |
467 | |
468 | #ifdef CONFIG_CACHE_XSC3L2 |
469 | mrc p15, 1, r0, c0, c0, 1 @ get L2 present information |
470 | ands r0, r0, #0xf8 |
471 | orrne r6, r6, #(1 << 26) @ enable L2 if present |
472 | #endif |
473 | |
474 | mrc p15, 0, r0, c1, c0, 0 @ get control register |
475 | bic r0, r0, r5 @ ..V. ..R. .... ..A. |
476 | orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) |
477 | @ ...I Z..S .... .... (uc) |
478 | ret lr |
479 | |
480 | .size __xsc3_setup, . - __xsc3_setup |
481 | |
482 | .type xsc3_crval, #object |
483 | xsc3_crval: |
484 | crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900 |
485 | |
486 | __INITDATA |
487 | |
488 | @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) |
489 | define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 |
490 | |
491 | .section ".rodata" |
492 | |
493 | string cpu_arch_name, "armv5te" |
494 | string cpu_elf_name, "v5" |
495 | string cpu_xsc3_name, "XScale-V3 based processor" |
496 | |
497 | .align |
498 | |
499 | .section ".proc.info.init" , "a" |
500 | |
501 | .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req |
502 | .type __\name\()_proc_info,#object |
503 | __\name\()_proc_info: |
504 | .long \cpu_val |
505 | .long \cpu_mask |
506 | .long PMD_TYPE_SECT | \ |
507 | PMD_SECT_BUFFERABLE | \ |
508 | PMD_SECT_CACHEABLE | \ |
509 | PMD_SECT_AP_WRITE | \ |
510 | PMD_SECT_AP_READ |
511 | .long PMD_TYPE_SECT | \ |
512 | PMD_SECT_AP_WRITE | \ |
513 | PMD_SECT_AP_READ |
514 | initfn __xsc3_setup, __\name\()_proc_info |
515 | .long cpu_arch_name |
516 | .long cpu_elf_name |
517 | .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP |
518 | .long cpu_xsc3_name |
519 | .long xsc3_processor_functions |
520 | .long v4wbi_tlb_fns |
521 | .long xsc3_mc_user_fns |
522 | .long xsc3_cache_fns |
523 | .size __\name\()_proc_info, . - __\name\()_proc_info |
524 | .endm |
525 | |
526 | xsc3_proc_info xsc3, 0x69056000, 0xffffe000 |
527 | |
528 | /* Note: PXA935 changed its implementor ID from Intel to Marvell */ |
529 | xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000 |
530 | |