1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Broadcom Brahma-B15 CPU read-ahead cache management functions |
4 | * |
5 | * Copyright (C) 2015-2016 Broadcom |
6 | */ |
7 | |
8 | #include <linux/err.h> |
9 | #include <linux/spinlock.h> |
10 | #include <linux/io.h> |
11 | #include <linux/bitops.h> |
12 | #include <linux/of_address.h> |
13 | #include <linux/notifier.h> |
14 | #include <linux/cpu.h> |
15 | #include <linux/syscore_ops.h> |
16 | #include <linux/reboot.h> |
17 | |
18 | #include <asm/cacheflush.h> |
19 | #include <asm/hardware/cache-b15-rac.h> |
20 | |
21 | extern void v7_flush_kern_cache_all(void); |
22 | |
23 | /* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */ |
24 | #define RAC_CONFIG0_REG (0x78) |
25 | #define RACENPREF_MASK (0x3) |
26 | #define RACPREFINST_SHIFT (0) |
27 | #define RACENINST_SHIFT (2) |
28 | #define RACPREFDATA_SHIFT (4) |
29 | #define RACENDATA_SHIFT (6) |
30 | #define RAC_CPU_SHIFT (8) |
31 | #define RACCFG_MASK (0xff) |
32 | #define RAC_CONFIG1_REG (0x7c) |
33 | /* Brahma-B15 is a quad-core only design */ |
34 | #define B15_RAC_FLUSH_REG (0x80) |
35 | /* Brahma-B53 is an octo-core design */ |
36 | #define B53_RAC_FLUSH_REG (0x84) |
37 | #define FLUSH_RAC (1 << 0) |
38 | |
39 | /* Bitmask to enable instruction and data prefetching with a 256-bytes stride */ |
40 | #define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \ |
41 | RACENPREF_MASK << RACENINST_SHIFT | \ |
42 | 1 << RACPREFDATA_SHIFT | \ |
43 | RACENPREF_MASK << RACENDATA_SHIFT) |
44 | |
45 | #define RAC_ENABLED 0 |
46 | /* Special state where we want to bypass the spinlock and call directly |
47 | * into the v7 cache maintenance operations during suspend/resume |
48 | */ |
49 | #define RAC_SUSPENDED 1 |
50 | |
51 | static void __iomem *b15_rac_base; |
52 | static DEFINE_SPINLOCK(rac_lock); |
53 | |
54 | static u32 rac_config0_reg; |
55 | static u32 rac_flush_offset; |
56 | |
57 | /* Initialization flag to avoid checking for b15_rac_base, and to prevent |
58 | * multi-platform kernels from crashing here as well. |
59 | */ |
60 | static unsigned long b15_rac_flags; |
61 | |
62 | static inline u32 __b15_rac_disable(void) |
63 | { |
64 | u32 val = __raw_readl(addr: b15_rac_base + RAC_CONFIG0_REG); |
65 | __raw_writel(val: 0, addr: b15_rac_base + RAC_CONFIG0_REG); |
66 | dmb(); |
67 | return val; |
68 | } |
69 | |
70 | static inline void __b15_rac_flush(void) |
71 | { |
72 | u32 reg; |
73 | |
74 | __raw_writel(FLUSH_RAC, addr: b15_rac_base + rac_flush_offset); |
75 | do { |
76 | /* This dmb() is required to force the Bus Interface Unit |
77 | * to clean outstanding writes, and forces an idle cycle |
78 | * to be inserted. |
79 | */ |
80 | dmb(); |
81 | reg = __raw_readl(addr: b15_rac_base + rac_flush_offset); |
82 | } while (reg & FLUSH_RAC); |
83 | } |
84 | |
85 | static inline u32 b15_rac_disable_and_flush(void) |
86 | { |
87 | u32 reg; |
88 | |
89 | reg = __b15_rac_disable(); |
90 | __b15_rac_flush(); |
91 | return reg; |
92 | } |
93 | |
94 | static inline void __b15_rac_enable(u32 val) |
95 | { |
96 | __raw_writel(val, addr: b15_rac_base + RAC_CONFIG0_REG); |
97 | /* dsb() is required here to be consistent with __flush_icache_all() */ |
98 | dsb(); |
99 | } |
100 | |
101 | #define BUILD_RAC_CACHE_OP(name, bar) \ |
102 | void b15_flush_##name(void) \ |
103 | { \ |
104 | unsigned int do_flush; \ |
105 | u32 val = 0; \ |
106 | \ |
107 | if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) { \ |
108 | v7_flush_##name(); \ |
109 | bar; \ |
110 | return; \ |
111 | } \ |
112 | \ |
113 | spin_lock(&rac_lock); \ |
114 | do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \ |
115 | if (do_flush) \ |
116 | val = b15_rac_disable_and_flush(); \ |
117 | v7_flush_##name(); \ |
118 | if (!do_flush) \ |
119 | bar; \ |
120 | else \ |
121 | __b15_rac_enable(val); \ |
122 | spin_unlock(&rac_lock); \ |
123 | } |
124 | |
125 | #define nobarrier |
126 | |
127 | /* The readahead cache present in the Brahma-B15 CPU is a special piece of |
128 | * hardware after the integrated L2 cache of the B15 CPU complex whose purpose |
129 | * is to prefetch instruction and/or data with a line size of either 64 bytes |
130 | * or 256 bytes. The rationale is that the data-bus of the CPU interface is |
131 | * optimized for 256-bytes transactions, and enabling the readahead cache |
132 | * provides a significant performance boost we want it enabled (typically |
133 | * twice the performance for a memcpy benchmark application). |
134 | * |
135 | * The readahead cache is transparent for Modified Virtual Addresses |
136 | * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and |
137 | * DCCIMVAC. |
138 | * |
139 | * It is however not transparent for the following cache maintenance |
140 | * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely |
141 | * what we are patching here with our BUILD_RAC_CACHE_OP here. |
142 | */ |
143 | BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier); |
144 | |
145 | static void b15_rac_enable(void) |
146 | { |
147 | unsigned int cpu; |
148 | u32 enable = 0; |
149 | |
150 | for_each_possible_cpu(cpu) |
151 | enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT)); |
152 | |
153 | b15_rac_disable_and_flush(); |
154 | __b15_rac_enable(val: enable); |
155 | } |
156 | |
157 | static int b15_rac_reboot_notifier(struct notifier_block *nb, |
158 | unsigned long action, |
159 | void *data) |
160 | { |
161 | /* During kexec, we are not yet migrated on the boot CPU, so we need to |
162 | * make sure we are SMP safe here. Once the RAC is disabled, flag it as |
163 | * suspended such that the hotplug notifier returns early. |
164 | */ |
165 | if (action == SYS_RESTART) { |
166 | spin_lock(lock: &rac_lock); |
167 | b15_rac_disable_and_flush(); |
168 | clear_bit(RAC_ENABLED, addr: &b15_rac_flags); |
169 | set_bit(RAC_SUSPENDED, addr: &b15_rac_flags); |
170 | spin_unlock(lock: &rac_lock); |
171 | } |
172 | |
173 | return NOTIFY_DONE; |
174 | } |
175 | |
176 | static struct notifier_block b15_rac_reboot_nb = { |
177 | .notifier_call = b15_rac_reboot_notifier, |
178 | }; |
179 | |
180 | /* The CPU hotplug case is the most interesting one, we basically need to make |
181 | * sure that the RAC is disabled for the entire system prior to having a CPU |
182 | * die, in particular prior to this dying CPU having exited the coherency |
183 | * domain. |
184 | * |
185 | * Once this CPU is marked dead, we can safely re-enable the RAC for the |
186 | * remaining CPUs in the system which are still online. |
187 | * |
188 | * Offlining a CPU is the problematic case, onlining a CPU is not much of an |
189 | * issue since the CPU and its cache-level hierarchy will start filling with |
190 | * the RAC disabled, so L1 and L2 only. |
191 | * |
192 | * In this function, we should NOT have to verify any unsafe setting/condition |
193 | * b15_rac_base: |
194 | * |
195 | * It is protected by the RAC_ENABLED flag which is cleared by default, and |
196 | * being cleared when initial procedure is done. b15_rac_base had been set at |
197 | * that time. |
198 | * |
199 | * RAC_ENABLED: |
200 | * There is a small timing windows, in b15_rac_init(), between |
201 | * cpuhp_setup_state_*() |
202 | * ... |
203 | * set RAC_ENABLED |
204 | * However, there is no hotplug activity based on the Linux booting procedure. |
205 | * |
206 | * Since we have to disable RAC for all cores, we keep RAC on as long as as |
207 | * possible (disable it as late as possible) to gain the cache benefit. |
208 | * |
209 | * Thus, dying/dead states are chosen here |
210 | * |
211 | * We are choosing not do disable the RAC on a per-CPU basis, here, if we did |
212 | * we would want to consider disabling it as early as possible to benefit the |
213 | * other active CPUs. |
214 | */ |
215 | |
216 | /* Running on the dying CPU */ |
217 | static int b15_rac_dying_cpu(unsigned int cpu) |
218 | { |
219 | /* During kexec/reboot, the RAC is disabled via the reboot notifier |
220 | * return early here. |
221 | */ |
222 | if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) |
223 | return 0; |
224 | |
225 | spin_lock(lock: &rac_lock); |
226 | |
227 | /* Indicate that we are starting a hotplug procedure */ |
228 | __clear_bit(RAC_ENABLED, &b15_rac_flags); |
229 | |
230 | /* Disable the readahead cache and save its value to a global */ |
231 | rac_config0_reg = b15_rac_disable_and_flush(); |
232 | |
233 | spin_unlock(lock: &rac_lock); |
234 | |
235 | return 0; |
236 | } |
237 | |
238 | /* Running on a non-dying CPU */ |
239 | static int b15_rac_dead_cpu(unsigned int cpu) |
240 | { |
241 | /* During kexec/reboot, the RAC is disabled via the reboot notifier |
242 | * return early here. |
243 | */ |
244 | if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) |
245 | return 0; |
246 | |
247 | spin_lock(lock: &rac_lock); |
248 | |
249 | /* And enable it */ |
250 | __b15_rac_enable(val: rac_config0_reg); |
251 | __set_bit(RAC_ENABLED, &b15_rac_flags); |
252 | |
253 | spin_unlock(lock: &rac_lock); |
254 | |
255 | return 0; |
256 | } |
257 | |
258 | static int b15_rac_suspend(void) |
259 | { |
260 | /* Suspend the read-ahead cache oeprations, forcing our cache |
261 | * implementation to fallback to the regular ARMv7 calls. |
262 | * |
263 | * We are guaranteed to be running on the boot CPU at this point and |
264 | * with every other CPU quiesced, so setting RAC_SUSPENDED is not racy |
265 | * here. |
266 | */ |
267 | rac_config0_reg = b15_rac_disable_and_flush(); |
268 | set_bit(RAC_SUSPENDED, addr: &b15_rac_flags); |
269 | |
270 | return 0; |
271 | } |
272 | |
273 | static void b15_rac_resume(void) |
274 | { |
275 | /* Coming out of a S3 suspend/resume cycle, the read-ahead cache |
276 | * register RAC_CONFIG0_REG will be restored to its default value, make |
277 | * sure we re-enable it and set the enable flag, we are also guaranteed |
278 | * to run on the boot CPU, so not racy again. |
279 | */ |
280 | __b15_rac_enable(val: rac_config0_reg); |
281 | clear_bit(RAC_SUSPENDED, addr: &b15_rac_flags); |
282 | } |
283 | |
284 | static struct syscore_ops b15_rac_syscore_ops = { |
285 | .suspend = b15_rac_suspend, |
286 | .resume = b15_rac_resume, |
287 | }; |
288 | |
289 | static int __init b15_rac_init(void) |
290 | { |
291 | struct device_node *dn, *cpu_dn; |
292 | int ret = 0, cpu; |
293 | u32 reg, en_mask = 0; |
294 | |
295 | dn = of_find_compatible_node(NULL, NULL, compat: "brcm,brcmstb-cpu-biu-ctrl" ); |
296 | if (!dn) |
297 | return -ENODEV; |
298 | |
299 | if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n" )) |
300 | goto out; |
301 | |
302 | b15_rac_base = of_iomap(node: dn, index: 0); |
303 | if (!b15_rac_base) { |
304 | pr_err("failed to remap BIU control base\n" ); |
305 | ret = -ENOMEM; |
306 | goto out; |
307 | } |
308 | |
309 | cpu_dn = of_get_cpu_node(cpu: 0, NULL); |
310 | if (!cpu_dn) { |
311 | ret = -ENODEV; |
312 | goto out; |
313 | } |
314 | |
315 | if (of_device_is_compatible(device: cpu_dn, "brcm,brahma-b15" )) |
316 | rac_flush_offset = B15_RAC_FLUSH_REG; |
317 | else if (of_device_is_compatible(device: cpu_dn, "brcm,brahma-b53" )) |
318 | rac_flush_offset = B53_RAC_FLUSH_REG; |
319 | else { |
320 | pr_err("Unsupported CPU\n" ); |
321 | of_node_put(node: cpu_dn); |
322 | ret = -EINVAL; |
323 | goto out; |
324 | } |
325 | of_node_put(node: cpu_dn); |
326 | |
327 | ret = register_reboot_notifier(&b15_rac_reboot_nb); |
328 | if (ret) { |
329 | pr_err("failed to register reboot notifier\n" ); |
330 | iounmap(addr: b15_rac_base); |
331 | goto out; |
332 | } |
333 | |
334 | if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { |
335 | ret = cpuhp_setup_state_nocalls(state: CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, |
336 | name: "arm/cache-b15-rac:dead" , |
337 | NULL, teardown: b15_rac_dead_cpu); |
338 | if (ret) |
339 | goto out_unmap; |
340 | |
341 | ret = cpuhp_setup_state_nocalls(state: CPUHP_AP_ARM_CACHE_B15_RAC_DYING, |
342 | name: "arm/cache-b15-rac:dying" , |
343 | NULL, teardown: b15_rac_dying_cpu); |
344 | if (ret) |
345 | goto out_cpu_dead; |
346 | } |
347 | |
348 | if (IS_ENABLED(CONFIG_PM_SLEEP)) |
349 | register_syscore_ops(ops: &b15_rac_syscore_ops); |
350 | |
351 | spin_lock(lock: &rac_lock); |
352 | reg = __raw_readl(addr: b15_rac_base + RAC_CONFIG0_REG); |
353 | for_each_possible_cpu(cpu) |
354 | en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT)); |
355 | WARN(reg & en_mask, "Read-ahead cache not previously disabled\n" ); |
356 | |
357 | b15_rac_enable(); |
358 | set_bit(RAC_ENABLED, addr: &b15_rac_flags); |
359 | spin_unlock(lock: &rac_lock); |
360 | |
361 | pr_info("%pOF: Broadcom Brahma-B15 readahead cache\n" , dn); |
362 | |
363 | goto out; |
364 | |
365 | out_cpu_dead: |
366 | cpuhp_remove_state_nocalls(state: CPUHP_AP_ARM_CACHE_B15_RAC_DYING); |
367 | out_unmap: |
368 | unregister_reboot_notifier(&b15_rac_reboot_nb); |
369 | iounmap(addr: b15_rac_base); |
370 | out: |
371 | of_node_put(node: dn); |
372 | return ret; |
373 | } |
374 | arch_initcall(b15_rac_init); |
375 | |