1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * L220/L310 cache controller support |
4 | * |
5 | * Copyright (C) 2016 ARM Limited |
6 | */ |
7 | #include <linux/errno.h> |
8 | #include <linux/hrtimer.h> |
9 | #include <linux/io.h> |
10 | #include <linux/list.h> |
11 | #include <linux/perf_event.h> |
12 | #include <linux/printk.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/types.h> |
15 | |
16 | #include <asm/hardware/cache-l2x0.h> |
17 | |
18 | #define PMU_NR_COUNTERS 2 |
19 | |
20 | static void __iomem *l2x0_base; |
21 | static struct pmu *l2x0_pmu; |
22 | static cpumask_t pmu_cpu; |
23 | |
24 | static const char *l2x0_name; |
25 | |
26 | static ktime_t l2x0_pmu_poll_period; |
27 | static struct hrtimer l2x0_pmu_hrtimer; |
28 | |
29 | /* |
30 | * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0. |
31 | * Registers controlling these are laid out in pairs, in descending order, i.e. |
32 | * the register for Counter1 comes first, followed by the register for |
33 | * Counter0. |
34 | * We ensure that idx 0 -> Counter0, and idx1 -> Counter1. |
35 | */ |
36 | static struct perf_event *events[PMU_NR_COUNTERS]; |
37 | |
38 | /* Find an unused counter */ |
39 | static int l2x0_pmu_find_idx(void) |
40 | { |
41 | int i; |
42 | |
43 | for (i = 0; i < PMU_NR_COUNTERS; i++) { |
44 | if (!events[i]) |
45 | return i; |
46 | } |
47 | |
48 | return -1; |
49 | } |
50 | |
51 | /* How many counters are allocated? */ |
52 | static int l2x0_pmu_num_active_counters(void) |
53 | { |
54 | int i, cnt = 0; |
55 | |
56 | for (i = 0; i < PMU_NR_COUNTERS; i++) { |
57 | if (events[i]) |
58 | cnt++; |
59 | } |
60 | |
61 | return cnt; |
62 | } |
63 | |
64 | static void l2x0_pmu_counter_config_write(int idx, u32 val) |
65 | { |
66 | writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx); |
67 | } |
68 | |
69 | static u32 l2x0_pmu_counter_read(int idx) |
70 | { |
71 | return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); |
72 | } |
73 | |
74 | static void l2x0_pmu_counter_write(int idx, u32 val) |
75 | { |
76 | writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); |
77 | } |
78 | |
79 | static void __l2x0_pmu_enable(void) |
80 | { |
81 | u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); |
82 | val |= L2X0_EVENT_CNT_CTRL_ENABLE; |
83 | writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); |
84 | } |
85 | |
86 | static void __l2x0_pmu_disable(void) |
87 | { |
88 | u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); |
89 | val &= ~L2X0_EVENT_CNT_CTRL_ENABLE; |
90 | writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); |
91 | } |
92 | |
93 | static void l2x0_pmu_enable(struct pmu *pmu) |
94 | { |
95 | if (l2x0_pmu_num_active_counters() == 0) |
96 | return; |
97 | |
98 | __l2x0_pmu_enable(); |
99 | } |
100 | |
101 | static void l2x0_pmu_disable(struct pmu *pmu) |
102 | { |
103 | if (l2x0_pmu_num_active_counters() == 0) |
104 | return; |
105 | |
106 | __l2x0_pmu_disable(); |
107 | } |
108 | |
109 | static void warn_if_saturated(u32 count) |
110 | { |
111 | if (count != 0xffffffff) |
112 | return; |
113 | |
114 | pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n" ); |
115 | } |
116 | |
117 | static void l2x0_pmu_event_read(struct perf_event *event) |
118 | { |
119 | struct hw_perf_event *hw = &event->hw; |
120 | u64 prev_count, new_count, mask; |
121 | |
122 | do { |
123 | prev_count = local64_read(&hw->prev_count); |
124 | new_count = l2x0_pmu_counter_read(idx: hw->idx); |
125 | } while (local64_xchg(&hw->prev_count, new_count) != prev_count); |
126 | |
127 | mask = GENMASK_ULL(31, 0); |
128 | local64_add((new_count - prev_count) & mask, &event->count); |
129 | |
130 | warn_if_saturated(count: new_count); |
131 | } |
132 | |
133 | static void l2x0_pmu_event_configure(struct perf_event *event) |
134 | { |
135 | struct hw_perf_event *hw = &event->hw; |
136 | |
137 | /* |
138 | * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we |
139 | * will *always* lose some number of events when a counter saturates, |
140 | * and have no way of detecting how many were lost. |
141 | * |
142 | * To minimize the impact of this, we try to maximize the period by |
143 | * always starting counters at zero. To ensure that group ratios are |
144 | * representative, we poll periodically to avoid counters saturating. |
145 | * See l2x0_pmu_poll(). |
146 | */ |
147 | local64_set(&hw->prev_count, 0); |
148 | l2x0_pmu_counter_write(idx: hw->idx, val: 0); |
149 | } |
150 | |
151 | static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer) |
152 | { |
153 | unsigned long flags; |
154 | int i; |
155 | |
156 | local_irq_save(flags); |
157 | __l2x0_pmu_disable(); |
158 | |
159 | for (i = 0; i < PMU_NR_COUNTERS; i++) { |
160 | struct perf_event *event = events[i]; |
161 | |
162 | if (!event) |
163 | continue; |
164 | |
165 | l2x0_pmu_event_read(event); |
166 | l2x0_pmu_event_configure(event); |
167 | } |
168 | |
169 | __l2x0_pmu_enable(); |
170 | local_irq_restore(flags); |
171 | |
172 | hrtimer_forward_now(timer: hrtimer, interval: l2x0_pmu_poll_period); |
173 | return HRTIMER_RESTART; |
174 | } |
175 | |
176 | |
177 | static void __l2x0_pmu_event_enable(int idx, u32 event) |
178 | { |
179 | u32 val; |
180 | |
181 | val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT; |
182 | val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; |
183 | l2x0_pmu_counter_config_write(idx, val); |
184 | } |
185 | |
186 | static void l2x0_pmu_event_start(struct perf_event *event, int flags) |
187 | { |
188 | struct hw_perf_event *hw = &event->hw; |
189 | |
190 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) |
191 | return; |
192 | |
193 | if (flags & PERF_EF_RELOAD) { |
194 | WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE)); |
195 | l2x0_pmu_event_configure(event); |
196 | } |
197 | |
198 | hw->state = 0; |
199 | |
200 | __l2x0_pmu_event_enable(idx: hw->idx, event: hw->config_base); |
201 | } |
202 | |
203 | static void __l2x0_pmu_event_disable(int idx) |
204 | { |
205 | u32 val; |
206 | |
207 | val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT; |
208 | val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; |
209 | l2x0_pmu_counter_config_write(idx, val); |
210 | } |
211 | |
212 | static void l2x0_pmu_event_stop(struct perf_event *event, int flags) |
213 | { |
214 | struct hw_perf_event *hw = &event->hw; |
215 | |
216 | if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED)) |
217 | return; |
218 | |
219 | __l2x0_pmu_event_disable(idx: hw->idx); |
220 | |
221 | hw->state |= PERF_HES_STOPPED; |
222 | |
223 | if (flags & PERF_EF_UPDATE) { |
224 | l2x0_pmu_event_read(event); |
225 | hw->state |= PERF_HES_UPTODATE; |
226 | } |
227 | } |
228 | |
229 | static int l2x0_pmu_event_add(struct perf_event *event, int flags) |
230 | { |
231 | struct hw_perf_event *hw = &event->hw; |
232 | int idx = l2x0_pmu_find_idx(); |
233 | |
234 | if (idx == -1) |
235 | return -EAGAIN; |
236 | |
237 | /* |
238 | * Pin the timer, so that the overflows are handled by the chosen |
239 | * event->cpu (this is the same one as presented in "cpumask" |
240 | * attribute). |
241 | */ |
242 | if (l2x0_pmu_num_active_counters() == 0) |
243 | hrtimer_start(timer: &l2x0_pmu_hrtimer, tim: l2x0_pmu_poll_period, |
244 | mode: HRTIMER_MODE_REL_PINNED); |
245 | |
246 | events[idx] = event; |
247 | hw->idx = idx; |
248 | |
249 | l2x0_pmu_event_configure(event); |
250 | |
251 | hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; |
252 | |
253 | if (flags & PERF_EF_START) |
254 | l2x0_pmu_event_start(event, flags: 0); |
255 | |
256 | return 0; |
257 | } |
258 | |
259 | static void l2x0_pmu_event_del(struct perf_event *event, int flags) |
260 | { |
261 | struct hw_perf_event *hw = &event->hw; |
262 | |
263 | l2x0_pmu_event_stop(event, PERF_EF_UPDATE); |
264 | |
265 | events[hw->idx] = NULL; |
266 | hw->idx = -1; |
267 | |
268 | if (l2x0_pmu_num_active_counters() == 0) |
269 | hrtimer_cancel(timer: &l2x0_pmu_hrtimer); |
270 | } |
271 | |
272 | static bool l2x0_pmu_group_is_valid(struct perf_event *event) |
273 | { |
274 | struct pmu *pmu = event->pmu; |
275 | struct perf_event *leader = event->group_leader; |
276 | struct perf_event *sibling; |
277 | int num_hw = 0; |
278 | |
279 | if (leader->pmu == pmu) |
280 | num_hw++; |
281 | else if (!is_software_event(event: leader)) |
282 | return false; |
283 | |
284 | for_each_sibling_event(sibling, leader) { |
285 | if (sibling->pmu == pmu) |
286 | num_hw++; |
287 | else if (!is_software_event(event: sibling)) |
288 | return false; |
289 | } |
290 | |
291 | return num_hw <= PMU_NR_COUNTERS; |
292 | } |
293 | |
294 | static int l2x0_pmu_event_init(struct perf_event *event) |
295 | { |
296 | struct hw_perf_event *hw = &event->hw; |
297 | |
298 | if (event->attr.type != l2x0_pmu->type) |
299 | return -ENOENT; |
300 | |
301 | if (is_sampling_event(event) || |
302 | event->attach_state & PERF_ATTACH_TASK) |
303 | return -EINVAL; |
304 | |
305 | if (event->cpu < 0) |
306 | return -EINVAL; |
307 | |
308 | if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK) |
309 | return -EINVAL; |
310 | |
311 | hw->config_base = event->attr.config; |
312 | |
313 | if (!l2x0_pmu_group_is_valid(event)) |
314 | return -EINVAL; |
315 | |
316 | event->cpu = cpumask_first(srcp: &pmu_cpu); |
317 | |
318 | return 0; |
319 | } |
320 | |
321 | struct l2x0_event_attribute { |
322 | struct device_attribute attr; |
323 | unsigned int config; |
324 | bool pl310_only; |
325 | }; |
326 | |
327 | #define L2X0_EVENT_ATTR(_name, _config, _pl310_only) \ |
328 | (&((struct l2x0_event_attribute[]) {{ \ |
329 | .attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL), \ |
330 | .config = _config, \ |
331 | .pl310_only = _pl310_only, \ |
332 | }})[0].attr.attr) |
333 | |
334 | #define L220_PLUS_EVENT_ATTR(_name, _config) \ |
335 | L2X0_EVENT_ATTR(_name, _config, false) |
336 | |
337 | #define PL310_EVENT_ATTR(_name, _config) \ |
338 | L2X0_EVENT_ATTR(_name, _config, true) |
339 | |
340 | static ssize_t l2x0_pmu_event_show(struct device *dev, |
341 | struct device_attribute *attr, char *buf) |
342 | { |
343 | struct l2x0_event_attribute *lattr; |
344 | |
345 | lattr = container_of(attr, typeof(*lattr), attr); |
346 | return snprintf(buf, PAGE_SIZE, fmt: "config=0x%x\n" , lattr->config); |
347 | } |
348 | |
349 | static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj, |
350 | struct attribute *attr, |
351 | int unused) |
352 | { |
353 | struct device *dev = kobj_to_dev(kobj); |
354 | struct pmu *pmu = dev_get_drvdata(dev); |
355 | struct l2x0_event_attribute *lattr; |
356 | |
357 | lattr = container_of(attr, typeof(*lattr), attr.attr); |
358 | |
359 | if (!lattr->pl310_only || strcmp("l2c_310" , pmu->name) == 0) |
360 | return attr->mode; |
361 | |
362 | return 0; |
363 | } |
364 | |
365 | static struct attribute *l2x0_pmu_event_attrs[] = { |
366 | L220_PLUS_EVENT_ATTR(co, 0x1), |
367 | L220_PLUS_EVENT_ATTR(drhit, 0x2), |
368 | L220_PLUS_EVENT_ATTR(drreq, 0x3), |
369 | L220_PLUS_EVENT_ATTR(dwhit, 0x4), |
370 | L220_PLUS_EVENT_ATTR(dwreq, 0x5), |
371 | L220_PLUS_EVENT_ATTR(dwtreq, 0x6), |
372 | L220_PLUS_EVENT_ATTR(irhit, 0x7), |
373 | L220_PLUS_EVENT_ATTR(irreq, 0x8), |
374 | L220_PLUS_EVENT_ATTR(wa, 0x9), |
375 | PL310_EVENT_ATTR(ipfalloc, 0xa), |
376 | PL310_EVENT_ATTR(epfhit, 0xb), |
377 | PL310_EVENT_ATTR(epfalloc, 0xc), |
378 | PL310_EVENT_ATTR(srrcvd, 0xd), |
379 | PL310_EVENT_ATTR(srconf, 0xe), |
380 | PL310_EVENT_ATTR(epfrcvd, 0xf), |
381 | NULL |
382 | }; |
383 | |
384 | static struct attribute_group l2x0_pmu_event_attrs_group = { |
385 | .name = "events" , |
386 | .attrs = l2x0_pmu_event_attrs, |
387 | .is_visible = l2x0_pmu_event_attr_is_visible, |
388 | }; |
389 | |
390 | static ssize_t l2x0_pmu_cpumask_show(struct device *dev, |
391 | struct device_attribute *attr, char *buf) |
392 | { |
393 | return cpumap_print_to_pagebuf(list: true, buf, mask: &pmu_cpu); |
394 | } |
395 | |
396 | static struct device_attribute l2x0_pmu_cpumask_attr = |
397 | __ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL); |
398 | |
399 | static struct attribute *l2x0_pmu_cpumask_attrs[] = { |
400 | &l2x0_pmu_cpumask_attr.attr, |
401 | NULL, |
402 | }; |
403 | |
404 | static struct attribute_group l2x0_pmu_cpumask_attr_group = { |
405 | .attrs = l2x0_pmu_cpumask_attrs, |
406 | }; |
407 | |
408 | static const struct attribute_group *l2x0_pmu_attr_groups[] = { |
409 | &l2x0_pmu_event_attrs_group, |
410 | &l2x0_pmu_cpumask_attr_group, |
411 | NULL, |
412 | }; |
413 | |
414 | static void l2x0_pmu_reset(void) |
415 | { |
416 | int i; |
417 | |
418 | __l2x0_pmu_disable(); |
419 | |
420 | for (i = 0; i < PMU_NR_COUNTERS; i++) |
421 | __l2x0_pmu_event_disable(idx: i); |
422 | } |
423 | |
424 | static int l2x0_pmu_offline_cpu(unsigned int cpu) |
425 | { |
426 | unsigned int target; |
427 | |
428 | if (!cpumask_test_and_clear_cpu(cpu, cpumask: &pmu_cpu)) |
429 | return 0; |
430 | |
431 | target = cpumask_any_but(cpu_online_mask, cpu); |
432 | if (target >= nr_cpu_ids) |
433 | return 0; |
434 | |
435 | perf_pmu_migrate_context(pmu: l2x0_pmu, src_cpu: cpu, dst_cpu: target); |
436 | cpumask_set_cpu(cpu: target, dstp: &pmu_cpu); |
437 | |
438 | return 0; |
439 | } |
440 | |
441 | void l2x0_pmu_suspend(void) |
442 | { |
443 | int i; |
444 | |
445 | if (!l2x0_pmu) |
446 | return; |
447 | |
448 | l2x0_pmu_disable(pmu: l2x0_pmu); |
449 | |
450 | for (i = 0; i < PMU_NR_COUNTERS; i++) { |
451 | if (events[i]) |
452 | l2x0_pmu_event_stop(event: events[i], PERF_EF_UPDATE); |
453 | } |
454 | |
455 | } |
456 | |
457 | void l2x0_pmu_resume(void) |
458 | { |
459 | int i; |
460 | |
461 | if (!l2x0_pmu) |
462 | return; |
463 | |
464 | l2x0_pmu_reset(); |
465 | |
466 | for (i = 0; i < PMU_NR_COUNTERS; i++) { |
467 | if (events[i]) |
468 | l2x0_pmu_event_start(event: events[i], PERF_EF_RELOAD); |
469 | } |
470 | |
471 | l2x0_pmu_enable(pmu: l2x0_pmu); |
472 | } |
473 | |
474 | void __init l2x0_pmu_register(void __iomem *base, u32 part) |
475 | { |
476 | /* |
477 | * Determine whether we support the PMU, and choose the name for sysfs. |
478 | * This is also used by l2x0_pmu_event_attr_is_visible to determine |
479 | * which events to display, as the PL310 PMU supports a superset of |
480 | * L220 events. |
481 | * |
482 | * The L210 PMU has a different programmer's interface, and is not |
483 | * supported by this driver. |
484 | * |
485 | * We must defer registering the PMU until the perf subsystem is up and |
486 | * running, so just stash the name and base, and leave that to another |
487 | * initcall. |
488 | */ |
489 | switch (part & L2X0_CACHE_ID_PART_MASK) { |
490 | case L2X0_CACHE_ID_PART_L220: |
491 | l2x0_name = "l2c_220" ; |
492 | break; |
493 | case L2X0_CACHE_ID_PART_L310: |
494 | l2x0_name = "l2c_310" ; |
495 | break; |
496 | default: |
497 | return; |
498 | } |
499 | |
500 | l2x0_base = base; |
501 | } |
502 | |
503 | static __init int l2x0_pmu_init(void) |
504 | { |
505 | int ret; |
506 | |
507 | if (!l2x0_base) |
508 | return 0; |
509 | |
510 | l2x0_pmu = kzalloc(size: sizeof(*l2x0_pmu), GFP_KERNEL); |
511 | if (!l2x0_pmu) { |
512 | pr_warn("Unable to allocate L2x0 PMU\n" ); |
513 | return -ENOMEM; |
514 | } |
515 | |
516 | *l2x0_pmu = (struct pmu) { |
517 | .task_ctx_nr = perf_invalid_context, |
518 | .pmu_enable = l2x0_pmu_enable, |
519 | .pmu_disable = l2x0_pmu_disable, |
520 | .read = l2x0_pmu_event_read, |
521 | .start = l2x0_pmu_event_start, |
522 | .stop = l2x0_pmu_event_stop, |
523 | .add = l2x0_pmu_event_add, |
524 | .del = l2x0_pmu_event_del, |
525 | .event_init = l2x0_pmu_event_init, |
526 | .attr_groups = l2x0_pmu_attr_groups, |
527 | .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
528 | }; |
529 | |
530 | l2x0_pmu_reset(); |
531 | |
532 | /* |
533 | * We always use a hrtimer rather than an interrupt. |
534 | * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll. |
535 | * |
536 | * Polling once a second allows the counters to fill up to 1/128th on a |
537 | * quad-core test chip with cores clocked at 400MHz. Hopefully this |
538 | * leaves sufficient headroom to avoid overflow on production silicon |
539 | * at higher frequencies. |
540 | */ |
541 | l2x0_pmu_poll_period = ms_to_ktime(ms: 1000); |
542 | hrtimer_init(timer: &l2x0_pmu_hrtimer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL); |
543 | l2x0_pmu_hrtimer.function = l2x0_pmu_poll; |
544 | |
545 | cpumask_set_cpu(cpu: 0, dstp: &pmu_cpu); |
546 | ret = cpuhp_setup_state_nocalls(state: CPUHP_AP_PERF_ARM_L2X0_ONLINE, |
547 | name: "perf/arm/l2x0:online" , NULL, |
548 | teardown: l2x0_pmu_offline_cpu); |
549 | if (ret) |
550 | goto out_pmu; |
551 | |
552 | ret = perf_pmu_register(pmu: l2x0_pmu, name: l2x0_name, type: -1); |
553 | if (ret) |
554 | goto out_cpuhp; |
555 | |
556 | return 0; |
557 | |
558 | out_cpuhp: |
559 | cpuhp_remove_state_nocalls(state: CPUHP_AP_PERF_ARM_L2X0_ONLINE); |
560 | out_pmu: |
561 | kfree(objp: l2x0_pmu); |
562 | l2x0_pmu = NULL; |
563 | return ret; |
564 | } |
565 | device_initcall(l2x0_pmu_init); |
566 | |