1 | /* |
2 | * arch/sh/mm/cache-sh4.c |
3 | * |
4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka |
5 | * Copyright (C) 2001 - 2009 Paul Mundt |
6 | * Copyright (C) 2003 Richard Curnow |
7 | * Copyright (c) 2007 STMicroelectronics (R&D) Ltd. |
8 | * |
9 | * This file is subject to the terms and conditions of the GNU General Public |
10 | * License. See the file "COPYING" in the main directory of this archive |
11 | * for more details. |
12 | */ |
13 | #include <linux/init.h> |
14 | #include <linux/mm.h> |
15 | #include <linux/io.h> |
16 | #include <linux/mutex.h> |
17 | #include <linux/fs.h> |
18 | #include <linux/highmem.h> |
19 | #include <linux/pagemap.h> |
20 | #include <asm/mmu_context.h> |
21 | #include <asm/cache_insns.h> |
22 | #include <asm/cacheflush.h> |
23 | |
24 | /* |
25 | * The maximum number of pages we support up to when doing ranged dcache |
26 | * flushing. Anything exceeding this will simply flush the dcache in its |
27 | * entirety. |
28 | */ |
29 | #define MAX_ICACHE_PAGES 32 |
30 | |
31 | static void __flush_cache_one(unsigned long addr, unsigned long phys, |
32 | unsigned long exec_offset); |
33 | |
34 | /* |
35 | * Write back the range of D-cache, and purge the I-cache. |
36 | * |
37 | * Called from kernel/module.c:sys_init_module and routine for a.out format, |
38 | * signal handler code and kprobes code |
39 | */ |
40 | static void sh4_flush_icache_range(void *args) |
41 | { |
42 | struct flusher_data *data = args; |
43 | unsigned long start, end; |
44 | unsigned long flags, v; |
45 | int i; |
46 | |
47 | start = data->addr1; |
48 | end = data->addr2; |
49 | |
50 | /* If there are too many pages then just blow away the caches */ |
51 | if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) { |
52 | local_flush_cache_all(NULL); |
53 | return; |
54 | } |
55 | |
56 | /* |
57 | * Selectively flush d-cache then invalidate the i-cache. |
58 | * This is inefficient, so only use this for small ranges. |
59 | */ |
60 | start &= ~(L1_CACHE_BYTES-1); |
61 | end += L1_CACHE_BYTES-1; |
62 | end &= ~(L1_CACHE_BYTES-1); |
63 | |
64 | local_irq_save(flags); |
65 | jump_to_uncached(); |
66 | |
67 | for (v = start; v < end; v += L1_CACHE_BYTES) { |
68 | unsigned long icacheaddr; |
69 | int j, n; |
70 | |
71 | __ocbwb(v); |
72 | |
73 | icacheaddr = CACHE_IC_ADDRESS_ARRAY | (v & |
74 | cpu_data->icache.entry_mask); |
75 | |
76 | /* Clear i-cache line valid-bit */ |
77 | n = boot_cpu_data.icache.n_aliases; |
78 | for (i = 0; i < cpu_data->icache.ways; i++) { |
79 | for (j = 0; j < n; j++) |
80 | __raw_writel(val: 0, addr: icacheaddr + (j * PAGE_SIZE)); |
81 | icacheaddr += cpu_data->icache.way_incr; |
82 | } |
83 | } |
84 | |
85 | back_to_cached(); |
86 | local_irq_restore(flags); |
87 | } |
88 | |
89 | static inline void flush_cache_one(unsigned long start, unsigned long phys) |
90 | { |
91 | unsigned long flags, exec_offset = 0; |
92 | |
93 | /* |
94 | * All types of SH-4 require PC to be uncached to operate on the I-cache. |
95 | * Some types of SH-4 require PC to be uncached to operate on the D-cache. |
96 | */ |
97 | if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) || |
98 | (start < CACHE_OC_ADDRESS_ARRAY)) |
99 | exec_offset = cached_to_uncached; |
100 | |
101 | local_irq_save(flags); |
102 | __flush_cache_one(addr: start, phys, exec_offset); |
103 | local_irq_restore(flags); |
104 | } |
105 | |
106 | /* |
107 | * Write back & invalidate the D-cache of the page. |
108 | * (To avoid "alias" issues) |
109 | */ |
110 | static void sh4_flush_dcache_folio(void *arg) |
111 | { |
112 | struct folio *folio = arg; |
113 | #ifndef CONFIG_SMP |
114 | struct address_space *mapping = folio_flush_mapping(folio); |
115 | |
116 | if (mapping && !mapping_mapped(mapping)) |
117 | clear_bit(PG_dcache_clean, &folio->flags); |
118 | else |
119 | #endif |
120 | { |
121 | unsigned long pfn = folio_pfn(folio); |
122 | unsigned long addr = (unsigned long)folio_address(folio); |
123 | unsigned int i, nr = folio_nr_pages(folio); |
124 | |
125 | for (i = 0; i < nr; i++) { |
126 | flush_cache_one(CACHE_OC_ADDRESS_ARRAY | |
127 | (addr & shm_align_mask), |
128 | pfn * PAGE_SIZE); |
129 | addr += PAGE_SIZE; |
130 | pfn++; |
131 | } |
132 | } |
133 | |
134 | wmb(); |
135 | } |
136 | |
137 | /* TODO: Selective icache invalidation through IC address array.. */ |
138 | static void flush_icache_all(void) |
139 | { |
140 | unsigned long flags, ccr; |
141 | |
142 | local_irq_save(flags); |
143 | jump_to_uncached(); |
144 | |
145 | /* Flush I-cache */ |
146 | ccr = __raw_readl(addr: SH_CCR); |
147 | ccr |= CCR_CACHE_ICI; |
148 | __raw_writel(val: ccr, addr: SH_CCR); |
149 | |
150 | /* |
151 | * back_to_cached() will take care of the barrier for us, don't add |
152 | * another one! |
153 | */ |
154 | |
155 | back_to_cached(); |
156 | local_irq_restore(flags); |
157 | } |
158 | |
159 | static void flush_dcache_all(void) |
160 | { |
161 | unsigned long addr, end_addr, entry_offset; |
162 | |
163 | end_addr = CACHE_OC_ADDRESS_ARRAY + |
164 | (current_cpu_data.dcache.sets << |
165 | current_cpu_data.dcache.entry_shift) * |
166 | current_cpu_data.dcache.ways; |
167 | |
168 | entry_offset = 1 << current_cpu_data.dcache.entry_shift; |
169 | |
170 | for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) { |
171 | __raw_writel(val: 0, addr); addr += entry_offset; |
172 | __raw_writel(val: 0, addr); addr += entry_offset; |
173 | __raw_writel(val: 0, addr); addr += entry_offset; |
174 | __raw_writel(val: 0, addr); addr += entry_offset; |
175 | __raw_writel(val: 0, addr); addr += entry_offset; |
176 | __raw_writel(val: 0, addr); addr += entry_offset; |
177 | __raw_writel(val: 0, addr); addr += entry_offset; |
178 | __raw_writel(val: 0, addr); addr += entry_offset; |
179 | } |
180 | } |
181 | |
182 | static void sh4_flush_cache_all(void *unused) |
183 | { |
184 | flush_dcache_all(); |
185 | flush_icache_all(); |
186 | } |
187 | |
188 | /* |
189 | * Note : (RPC) since the caches are physically tagged, the only point |
190 | * of flush_cache_mm for SH-4 is to get rid of aliases from the |
191 | * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that |
192 | * lines can stay resident so long as the virtual address they were |
193 | * accessed with (hence cache set) is in accord with the physical |
194 | * address (i.e. tag). It's no different here. |
195 | * |
196 | * Caller takes mm->mmap_lock. |
197 | */ |
198 | static void sh4_flush_cache_mm(void *arg) |
199 | { |
200 | struct mm_struct *mm = arg; |
201 | |
202 | if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT) |
203 | return; |
204 | |
205 | flush_dcache_all(); |
206 | } |
207 | |
208 | /* |
209 | * Write back and invalidate I/D-caches for the page. |
210 | * |
211 | * ADDR: Virtual Address (U0 address) |
212 | * PFN: Physical page number |
213 | */ |
214 | static void sh4_flush_cache_page(void *args) |
215 | { |
216 | struct flusher_data *data = args; |
217 | struct vm_area_struct *vma; |
218 | struct page *page; |
219 | unsigned long address, pfn, phys; |
220 | int map_coherent = 0; |
221 | pmd_t *pmd; |
222 | pte_t *pte; |
223 | void *vaddr; |
224 | |
225 | vma = data->vma; |
226 | address = data->addr1 & PAGE_MASK; |
227 | pfn = data->addr2; |
228 | phys = pfn << PAGE_SHIFT; |
229 | page = pfn_to_page(pfn); |
230 | |
231 | if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT) |
232 | return; |
233 | |
234 | pmd = pmd_off(mm: vma->vm_mm, va: address); |
235 | pte = pte_offset_kernel(pmd, address); |
236 | |
237 | /* If the page isn't present, there is nothing to do here. */ |
238 | if (!(pte_val(pte: *pte) & _PAGE_PRESENT)) |
239 | return; |
240 | |
241 | if ((vma->vm_mm == current->active_mm)) |
242 | vaddr = NULL; |
243 | else { |
244 | /* |
245 | * Use kmap_coherent or kmap_atomic to do flushes for |
246 | * another ASID than the current one. |
247 | */ |
248 | map_coherent = (current_cpu_data.dcache.n_aliases && |
249 | test_bit(PG_dcache_clean, &page->flags) && |
250 | page_mapcount(page)); |
251 | if (map_coherent) |
252 | vaddr = kmap_coherent(page, address); |
253 | else |
254 | vaddr = kmap_atomic(page); |
255 | |
256 | address = (unsigned long)vaddr; |
257 | } |
258 | |
259 | flush_cache_one(CACHE_OC_ADDRESS_ARRAY | |
260 | (address & shm_align_mask), phys); |
261 | |
262 | if (vma->vm_flags & VM_EXEC) |
263 | flush_icache_all(); |
264 | |
265 | if (vaddr) { |
266 | if (map_coherent) |
267 | kunmap_coherent(vaddr); |
268 | else |
269 | kunmap_atomic(vaddr); |
270 | } |
271 | } |
272 | |
273 | /* |
274 | * Write back and invalidate D-caches. |
275 | * |
276 | * START, END: Virtual Address (U0 address) |
277 | * |
278 | * NOTE: We need to flush the _physical_ page entry. |
279 | * Flushing the cache lines for U0 only isn't enough. |
280 | * We need to flush for P1 too, which may contain aliases. |
281 | */ |
282 | static void sh4_flush_cache_range(void *args) |
283 | { |
284 | struct flusher_data *data = args; |
285 | struct vm_area_struct *vma; |
286 | unsigned long start, end; |
287 | |
288 | vma = data->vma; |
289 | start = data->addr1; |
290 | end = data->addr2; |
291 | |
292 | if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT) |
293 | return; |
294 | |
295 | /* |
296 | * If cache is only 4k-per-way, there are never any 'aliases'. Since |
297 | * the cache is physically tagged, the data can just be left in there. |
298 | */ |
299 | if (boot_cpu_data.dcache.n_aliases == 0) |
300 | return; |
301 | |
302 | flush_dcache_all(); |
303 | |
304 | if (vma->vm_flags & VM_EXEC) |
305 | flush_icache_all(); |
306 | } |
307 | |
308 | /** |
309 | * __flush_cache_one |
310 | * |
311 | * @addr: address in memory mapped cache array |
312 | * @phys: P1 address to flush (has to match tags if addr has 'A' bit |
313 | * set i.e. associative write) |
314 | * @exec_offset: set to 0x20000000 if flush has to be executed from P2 |
315 | * region else 0x0 |
316 | * |
317 | * The offset into the cache array implied by 'addr' selects the |
318 | * 'colour' of the virtual address range that will be flushed. The |
319 | * operation (purge/write-back) is selected by the lower 2 bits of |
320 | * 'phys'. |
321 | */ |
322 | static void __flush_cache_one(unsigned long addr, unsigned long phys, |
323 | unsigned long exec_offset) |
324 | { |
325 | int way_count; |
326 | unsigned long base_addr = addr; |
327 | struct cache_info *dcache; |
328 | unsigned long way_incr; |
329 | unsigned long a, ea, p; |
330 | unsigned long temp_pc; |
331 | |
332 | dcache = &boot_cpu_data.dcache; |
333 | /* Write this way for better assembly. */ |
334 | way_count = dcache->ways; |
335 | way_incr = dcache->way_incr; |
336 | |
337 | /* |
338 | * Apply exec_offset (i.e. branch to P2 if required.). |
339 | * |
340 | * FIXME: |
341 | * |
342 | * If I write "=r" for the (temp_pc), it puts this in r6 hence |
343 | * trashing exec_offset before it's been added on - why? Hence |
344 | * "=&r" as a 'workaround' |
345 | */ |
346 | asm volatile("mov.l 1f, %0\n\t" |
347 | "add %1, %0\n\t" |
348 | "jmp @%0\n\t" |
349 | "nop\n\t" |
350 | ".balign 4\n\t" |
351 | "1: .long 2f\n\t" |
352 | "2:\n" : "=&r" (temp_pc) : "r" (exec_offset)); |
353 | |
354 | /* |
355 | * We know there will be >=1 iteration, so write as do-while to avoid |
356 | * pointless nead-of-loop check for 0 iterations. |
357 | */ |
358 | do { |
359 | ea = base_addr + PAGE_SIZE; |
360 | a = base_addr; |
361 | p = phys; |
362 | |
363 | do { |
364 | *(volatile unsigned long *)a = p; |
365 | /* |
366 | * Next line: intentionally not p+32, saves an add, p |
367 | * will do since only the cache tag bits need to |
368 | * match. |
369 | */ |
370 | *(volatile unsigned long *)(a+32) = p; |
371 | a += 64; |
372 | p += 64; |
373 | } while (a < ea); |
374 | |
375 | base_addr += way_incr; |
376 | } while (--way_count != 0); |
377 | } |
378 | |
379 | extern void __weak sh4__flush_region_init(void); |
380 | |
381 | /* |
382 | * SH-4 has virtually indexed and physically tagged cache. |
383 | */ |
384 | void __init sh4_cache_init(void) |
385 | { |
386 | printk("PVR=%08x CVR=%08x PRR=%08x\n" , |
387 | __raw_readl(CCN_PVR), |
388 | __raw_readl(CCN_CVR), |
389 | __raw_readl(CCN_PRR)); |
390 | |
391 | local_flush_icache_range = sh4_flush_icache_range; |
392 | local_flush_dcache_folio = sh4_flush_dcache_folio; |
393 | local_flush_cache_all = sh4_flush_cache_all; |
394 | local_flush_cache_mm = sh4_flush_cache_mm; |
395 | local_flush_cache_dup_mm = sh4_flush_cache_mm; |
396 | local_flush_cache_page = sh4_flush_cache_page; |
397 | local_flush_cache_range = sh4_flush_cache_range; |
398 | |
399 | sh4__flush_region_init(); |
400 | } |
401 | |