1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Marvell CN10K LLC-TAD perf driver |
3 | * |
4 | * Copyright (C) 2021 Marvell |
5 | */ |
6 | |
7 | #define pr_fmt(fmt) "tad_pmu: " fmt |
8 | |
9 | #include <linux/io.h> |
10 | #include <linux/module.h> |
11 | #include <linux/of.h> |
12 | #include <linux/cpuhotplug.h> |
13 | #include <linux/perf_event.h> |
14 | #include <linux/platform_device.h> |
15 | #include <linux/acpi.h> |
16 | |
17 | #define TAD_PFC_OFFSET 0x800 |
18 | #define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3)) |
19 | #define TAD_PRF_OFFSET 0x900 |
20 | #define TAD_PRF(counter) (TAD_PRF_OFFSET | (counter << 3)) |
21 | #define TAD_PRF_CNTSEL_MASK 0xFF |
22 | #define TAD_MAX_COUNTERS 8 |
23 | |
24 | #define to_tad_pmu(p) (container_of(p, struct tad_pmu, pmu)) |
25 | |
26 | struct tad_region { |
27 | void __iomem *base; |
28 | }; |
29 | |
30 | struct tad_pmu { |
31 | struct pmu pmu; |
32 | struct tad_region *regions; |
33 | u32 region_cnt; |
34 | unsigned int cpu; |
35 | struct hlist_node node; |
36 | struct perf_event *events[TAD_MAX_COUNTERS]; |
37 | DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS); |
38 | }; |
39 | |
40 | static int tad_pmu_cpuhp_state; |
41 | |
42 | static void tad_pmu_event_counter_read(struct perf_event *event) |
43 | { |
44 | struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu); |
45 | struct hw_perf_event *hwc = &event->hw; |
46 | u32 counter_idx = hwc->idx; |
47 | u64 prev, new; |
48 | int i; |
49 | |
50 | do { |
51 | prev = local64_read(&hwc->prev_count); |
52 | for (i = 0, new = 0; i < tad_pmu->region_cnt; i++) |
53 | new += readq(addr: tad_pmu->regions[i].base + |
54 | TAD_PFC(counter_idx)); |
55 | } while (local64_cmpxchg(l: &hwc->prev_count, old: prev, new) != prev); |
56 | |
57 | local64_add(new - prev, &event->count); |
58 | } |
59 | |
60 | static void tad_pmu_event_counter_stop(struct perf_event *event, int flags) |
61 | { |
62 | struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu); |
63 | struct hw_perf_event *hwc = &event->hw; |
64 | u32 counter_idx = hwc->idx; |
65 | int i; |
66 | |
67 | /* TAD()_PFC() stop counting on the write |
68 | * which sets TAD()_PRF()[CNTSEL] == 0 |
69 | */ |
70 | for (i = 0; i < tad_pmu->region_cnt; i++) { |
71 | writeq_relaxed(0, tad_pmu->regions[i].base + |
72 | TAD_PRF(counter_idx)); |
73 | } |
74 | |
75 | tad_pmu_event_counter_read(event); |
76 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; |
77 | } |
78 | |
79 | static void tad_pmu_event_counter_start(struct perf_event *event, int flags) |
80 | { |
81 | struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu); |
82 | struct hw_perf_event *hwc = &event->hw; |
83 | u32 event_idx = event->attr.config; |
84 | u32 counter_idx = hwc->idx; |
85 | u64 reg_val; |
86 | int i; |
87 | |
88 | hwc->state = 0; |
89 | |
90 | /* Typically TAD_PFC() are zeroed to start counting */ |
91 | for (i = 0; i < tad_pmu->region_cnt; i++) |
92 | writeq_relaxed(0, tad_pmu->regions[i].base + |
93 | TAD_PFC(counter_idx)); |
94 | |
95 | /* TAD()_PFC() start counting on the write |
96 | * which sets TAD()_PRF()[CNTSEL] != 0 |
97 | */ |
98 | for (i = 0; i < tad_pmu->region_cnt; i++) { |
99 | reg_val = event_idx & 0xFF; |
100 | writeq_relaxed(reg_val, tad_pmu->regions[i].base + |
101 | TAD_PRF(counter_idx)); |
102 | } |
103 | } |
104 | |
105 | static void tad_pmu_event_counter_del(struct perf_event *event, int flags) |
106 | { |
107 | struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu); |
108 | struct hw_perf_event *hwc = &event->hw; |
109 | int idx = hwc->idx; |
110 | |
111 | tad_pmu_event_counter_stop(event, flags: flags | PERF_EF_UPDATE); |
112 | tad_pmu->events[idx] = NULL; |
113 | clear_bit(nr: idx, addr: tad_pmu->counters_map); |
114 | } |
115 | |
116 | static int tad_pmu_event_counter_add(struct perf_event *event, int flags) |
117 | { |
118 | struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu); |
119 | struct hw_perf_event *hwc = &event->hw; |
120 | int idx; |
121 | |
122 | /* Get a free counter for this event */ |
123 | idx = find_first_zero_bit(addr: tad_pmu->counters_map, TAD_MAX_COUNTERS); |
124 | if (idx == TAD_MAX_COUNTERS) |
125 | return -EAGAIN; |
126 | |
127 | set_bit(nr: idx, addr: tad_pmu->counters_map); |
128 | |
129 | hwc->idx = idx; |
130 | hwc->state = PERF_HES_STOPPED; |
131 | tad_pmu->events[idx] = event; |
132 | |
133 | if (flags & PERF_EF_START) |
134 | tad_pmu_event_counter_start(event, flags); |
135 | |
136 | return 0; |
137 | } |
138 | |
139 | static int tad_pmu_event_init(struct perf_event *event) |
140 | { |
141 | struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu); |
142 | |
143 | if (event->attr.type != event->pmu->type) |
144 | return -ENOENT; |
145 | |
146 | if (!event->attr.disabled) |
147 | return -EINVAL; |
148 | |
149 | if (event->state != PERF_EVENT_STATE_OFF) |
150 | return -EINVAL; |
151 | |
152 | event->cpu = tad_pmu->cpu; |
153 | event->hw.idx = -1; |
154 | event->hw.config_base = event->attr.config; |
155 | |
156 | return 0; |
157 | } |
158 | |
159 | static ssize_t tad_pmu_event_show(struct device *dev, |
160 | struct device_attribute *attr, char *page) |
161 | { |
162 | struct perf_pmu_events_attr *pmu_attr; |
163 | |
164 | pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); |
165 | return sysfs_emit(buf: page, fmt: "event=0x%02llx\n" , pmu_attr->id); |
166 | } |
167 | |
168 | #define TAD_PMU_EVENT_ATTR(name, config) \ |
169 | PMU_EVENT_ATTR_ID(name, tad_pmu_event_show, config) |
170 | |
171 | static struct attribute *tad_pmu_event_attrs[] = { |
172 | TAD_PMU_EVENT_ATTR(tad_none, 0x0), |
173 | TAD_PMU_EVENT_ATTR(tad_req_msh_in_any, 0x1), |
174 | TAD_PMU_EVENT_ATTR(tad_req_msh_in_mn, 0x2), |
175 | TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3), |
176 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_any, 0x4), |
177 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_mn, 0x5), |
178 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_exlmn, 0x6), |
179 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_dss, 0x7), |
180 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_retry_dss, 0x8), |
181 | TAD_PMU_EVENT_ATTR(tad_dat_msh_in_any, 0x9), |
182 | TAD_PMU_EVENT_ATTR(tad_dat_msh_in_dss, 0xa), |
183 | TAD_PMU_EVENT_ATTR(tad_req_msh_out_any, 0xb), |
184 | TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_rd, 0xc), |
185 | TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_wr, 0xd), |
186 | TAD_PMU_EVENT_ATTR(tad_req_msh_out_evict, 0xe), |
187 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_any, 0xf), |
188 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_retry_exlmn, 0x10), |
189 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_retry_mn, 0x11), |
190 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_exlmn, 0x12), |
191 | TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_mn, 0x13), |
192 | TAD_PMU_EVENT_ATTR(tad_snp_msh_out_any, 0x14), |
193 | TAD_PMU_EVENT_ATTR(tad_snp_msh_out_mn, 0x15), |
194 | TAD_PMU_EVENT_ATTR(tad_snp_msh_out_exlmn, 0x16), |
195 | TAD_PMU_EVENT_ATTR(tad_dat_msh_out_any, 0x17), |
196 | TAD_PMU_EVENT_ATTR(tad_dat_msh_out_fill, 0x18), |
197 | TAD_PMU_EVENT_ATTR(tad_dat_msh_out_dss, 0x19), |
198 | TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a), |
199 | TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b), |
200 | TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c), |
201 | TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d), |
202 | TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e), |
203 | TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f), |
204 | TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20), |
205 | TAD_PMU_EVENT_ATTR(tad_dat_rd, 0x21), |
206 | TAD_PMU_EVENT_ATTR(tad_dat_rd_byp, 0x22), |
207 | TAD_PMU_EVENT_ATTR(tad_ifb_occ, 0x23), |
208 | TAD_PMU_EVENT_ATTR(tad_req_occ, 0x24), |
209 | NULL |
210 | }; |
211 | |
212 | static const struct attribute_group tad_pmu_events_attr_group = { |
213 | .name = "events" , |
214 | .attrs = tad_pmu_event_attrs, |
215 | }; |
216 | |
217 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
218 | |
219 | static struct attribute *tad_pmu_format_attrs[] = { |
220 | &format_attr_event.attr, |
221 | NULL |
222 | }; |
223 | |
224 | static struct attribute_group tad_pmu_format_attr_group = { |
225 | .name = "format" , |
226 | .attrs = tad_pmu_format_attrs, |
227 | }; |
228 | |
229 | static ssize_t tad_pmu_cpumask_show(struct device *dev, |
230 | struct device_attribute *attr, char *buf) |
231 | { |
232 | struct tad_pmu *tad_pmu = to_tad_pmu(dev_get_drvdata(dev)); |
233 | |
234 | return cpumap_print_to_pagebuf(list: true, buf, cpumask_of(tad_pmu->cpu)); |
235 | } |
236 | |
237 | static DEVICE_ATTR(cpumask, 0444, tad_pmu_cpumask_show, NULL); |
238 | |
239 | static struct attribute *tad_pmu_cpumask_attrs[] = { |
240 | &dev_attr_cpumask.attr, |
241 | NULL |
242 | }; |
243 | |
244 | static struct attribute_group tad_pmu_cpumask_attr_group = { |
245 | .attrs = tad_pmu_cpumask_attrs, |
246 | }; |
247 | |
248 | static const struct attribute_group *tad_pmu_attr_groups[] = { |
249 | &tad_pmu_events_attr_group, |
250 | &tad_pmu_format_attr_group, |
251 | &tad_pmu_cpumask_attr_group, |
252 | NULL |
253 | }; |
254 | |
255 | static int tad_pmu_probe(struct platform_device *pdev) |
256 | { |
257 | struct device *dev = &pdev->dev; |
258 | struct tad_region *regions; |
259 | struct tad_pmu *tad_pmu; |
260 | struct resource *res; |
261 | u32 tad_pmu_page_size; |
262 | u32 tad_page_size; |
263 | u32 tad_cnt; |
264 | int i, ret; |
265 | char *name; |
266 | |
267 | tad_pmu = devm_kzalloc(dev: &pdev->dev, size: sizeof(*tad_pmu), GFP_KERNEL); |
268 | if (!tad_pmu) |
269 | return -ENOMEM; |
270 | |
271 | platform_set_drvdata(pdev, data: tad_pmu); |
272 | |
273 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
274 | if (!res) { |
275 | dev_err(&pdev->dev, "Mem resource not found\n" ); |
276 | return -ENODEV; |
277 | } |
278 | |
279 | ret = device_property_read_u32(dev, propname: "marvell,tad-page-size" , |
280 | val: &tad_page_size); |
281 | if (ret) { |
282 | dev_err(&pdev->dev, "Can't find tad-page-size property\n" ); |
283 | return ret; |
284 | } |
285 | |
286 | ret = device_property_read_u32(dev, propname: "marvell,tad-pmu-page-size" , |
287 | val: &tad_pmu_page_size); |
288 | if (ret) { |
289 | dev_err(&pdev->dev, "Can't find tad-pmu-page-size property\n" ); |
290 | return ret; |
291 | } |
292 | |
293 | ret = device_property_read_u32(dev, propname: "marvell,tad-cnt" , val: &tad_cnt); |
294 | if (ret) { |
295 | dev_err(&pdev->dev, "Can't find tad-cnt property\n" ); |
296 | return ret; |
297 | } |
298 | |
299 | regions = devm_kcalloc(dev: &pdev->dev, n: tad_cnt, |
300 | size: sizeof(*regions), GFP_KERNEL); |
301 | if (!regions) |
302 | return -ENOMEM; |
303 | |
304 | /* ioremap the distributed TAD pmu regions */ |
305 | for (i = 0; i < tad_cnt && res->start < res->end; i++) { |
306 | regions[i].base = devm_ioremap(dev: &pdev->dev, |
307 | offset: res->start, |
308 | size: tad_pmu_page_size); |
309 | if (!regions[i].base) { |
310 | dev_err(&pdev->dev, "TAD%d ioremap fail\n" , i); |
311 | return -ENOMEM; |
312 | } |
313 | res->start += tad_page_size; |
314 | } |
315 | |
316 | tad_pmu->regions = regions; |
317 | tad_pmu->region_cnt = tad_cnt; |
318 | |
319 | tad_pmu->pmu = (struct pmu) { |
320 | |
321 | .module = THIS_MODULE, |
322 | .attr_groups = tad_pmu_attr_groups, |
323 | .capabilities = PERF_PMU_CAP_NO_EXCLUDE | |
324 | PERF_PMU_CAP_NO_INTERRUPT, |
325 | .task_ctx_nr = perf_invalid_context, |
326 | |
327 | .event_init = tad_pmu_event_init, |
328 | .add = tad_pmu_event_counter_add, |
329 | .del = tad_pmu_event_counter_del, |
330 | .start = tad_pmu_event_counter_start, |
331 | .stop = tad_pmu_event_counter_stop, |
332 | .read = tad_pmu_event_counter_read, |
333 | }; |
334 | |
335 | tad_pmu->cpu = raw_smp_processor_id(); |
336 | |
337 | /* Register pmu instance for cpu hotplug */ |
338 | ret = cpuhp_state_add_instance_nocalls(state: tad_pmu_cpuhp_state, |
339 | node: &tad_pmu->node); |
340 | if (ret) { |
341 | dev_err(&pdev->dev, "Error %d registering hotplug\n" , ret); |
342 | return ret; |
343 | } |
344 | |
345 | name = "tad" ; |
346 | ret = perf_pmu_register(pmu: &tad_pmu->pmu, name, type: -1); |
347 | if (ret) |
348 | cpuhp_state_remove_instance_nocalls(state: tad_pmu_cpuhp_state, |
349 | node: &tad_pmu->node); |
350 | |
351 | return ret; |
352 | } |
353 | |
354 | static void tad_pmu_remove(struct platform_device *pdev) |
355 | { |
356 | struct tad_pmu *pmu = platform_get_drvdata(pdev); |
357 | |
358 | cpuhp_state_remove_instance_nocalls(state: tad_pmu_cpuhp_state, |
359 | node: &pmu->node); |
360 | perf_pmu_unregister(pmu: &pmu->pmu); |
361 | } |
362 | |
363 | #ifdef CONFIG_OF |
364 | static const struct of_device_id tad_pmu_of_match[] = { |
365 | { .compatible = "marvell,cn10k-tad-pmu" , }, |
366 | {}, |
367 | }; |
368 | #endif |
369 | |
370 | #ifdef CONFIG_ACPI |
371 | static const struct acpi_device_id tad_pmu_acpi_match[] = { |
372 | {"MRVL000B" , 0}, |
373 | {}, |
374 | }; |
375 | MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match); |
376 | #endif |
377 | |
378 | static struct platform_driver tad_pmu_driver = { |
379 | .driver = { |
380 | .name = "cn10k_tad_pmu" , |
381 | .of_match_table = of_match_ptr(tad_pmu_of_match), |
382 | .acpi_match_table = ACPI_PTR(tad_pmu_acpi_match), |
383 | .suppress_bind_attrs = true, |
384 | }, |
385 | .probe = tad_pmu_probe, |
386 | .remove_new = tad_pmu_remove, |
387 | }; |
388 | |
389 | static int tad_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) |
390 | { |
391 | struct tad_pmu *pmu = hlist_entry_safe(node, struct tad_pmu, node); |
392 | unsigned int target; |
393 | |
394 | if (cpu != pmu->cpu) |
395 | return 0; |
396 | |
397 | target = cpumask_any_but(cpu_online_mask, cpu); |
398 | if (target >= nr_cpu_ids) |
399 | return 0; |
400 | |
401 | perf_pmu_migrate_context(pmu: &pmu->pmu, src_cpu: cpu, dst_cpu: target); |
402 | pmu->cpu = target; |
403 | |
404 | return 0; |
405 | } |
406 | |
407 | static int __init tad_pmu_init(void) |
408 | { |
409 | int ret; |
410 | |
411 | ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, |
412 | name: "perf/cn10k/tadpmu:online" , |
413 | NULL, |
414 | teardown: tad_pmu_offline_cpu); |
415 | if (ret < 0) |
416 | return ret; |
417 | tad_pmu_cpuhp_state = ret; |
418 | ret = platform_driver_register(&tad_pmu_driver); |
419 | if (ret) |
420 | cpuhp_remove_multi_state(state: tad_pmu_cpuhp_state); |
421 | |
422 | return ret; |
423 | } |
424 | |
425 | static void __exit tad_pmu_exit(void) |
426 | { |
427 | platform_driver_unregister(&tad_pmu_driver); |
428 | cpuhp_remove_multi_state(state: tad_pmu_cpuhp_state); |
429 | } |
430 | |
431 | module_init(tad_pmu_init); |
432 | module_exit(tad_pmu_exit); |
433 | |
434 | MODULE_DESCRIPTION("Marvell CN10K LLC-TAD Perf driver" ); |
435 | MODULE_AUTHOR("Bhaskara Budiredla <bbudiredla@marvell.com>" ); |
436 | MODULE_LICENSE("GPL v2" ); |
437 | |