1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * HiSilicon SoC Hardware event counters support |
4 | * |
5 | * Copyright (C) 2017 HiSilicon Limited |
6 | * Author: Anurup M <anurup.m@huawei.com> |
7 | * Shaokun Zhang <zhangshaokun@hisilicon.com> |
8 | * |
9 | * This code is based on the uncore PMUs like arm-cci and arm-ccn. |
10 | */ |
11 | #include <linux/bitmap.h> |
12 | #include <linux/bitops.h> |
13 | #include <linux/bug.h> |
14 | #include <linux/err.h> |
15 | #include <linux/errno.h> |
16 | #include <linux/interrupt.h> |
17 | |
18 | #include <asm/cputype.h> |
19 | #include <asm/local64.h> |
20 | |
21 | #include "hisi_uncore_pmu.h" |
22 | |
23 | #define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0)) |
24 | |
25 | /* |
26 | * PMU format attributes |
27 | */ |
28 | ssize_t hisi_format_sysfs_show(struct device *dev, |
29 | struct device_attribute *attr, char *buf) |
30 | { |
31 | struct dev_ext_attribute *eattr; |
32 | |
33 | eattr = container_of(attr, struct dev_ext_attribute, attr); |
34 | |
35 | return sysfs_emit(buf, fmt: "%s\n" , (char *)eattr->var); |
36 | } |
37 | EXPORT_SYMBOL_GPL(hisi_format_sysfs_show); |
38 | |
39 | /* |
40 | * PMU event attributes |
41 | */ |
42 | ssize_t hisi_event_sysfs_show(struct device *dev, |
43 | struct device_attribute *attr, char *page) |
44 | { |
45 | struct dev_ext_attribute *eattr; |
46 | |
47 | eattr = container_of(attr, struct dev_ext_attribute, attr); |
48 | |
49 | return sysfs_emit(buf: page, fmt: "config=0x%lx\n" , (unsigned long)eattr->var); |
50 | } |
51 | EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); |
52 | |
53 | /* |
54 | * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show |
55 | */ |
56 | ssize_t hisi_cpumask_sysfs_show(struct device *dev, |
57 | struct device_attribute *attr, char *buf) |
58 | { |
59 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); |
60 | |
61 | return sysfs_emit(buf, fmt: "%d\n" , hisi_pmu->on_cpu); |
62 | } |
63 | EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); |
64 | |
65 | static bool hisi_validate_event_group(struct perf_event *event) |
66 | { |
67 | struct perf_event *sibling, *leader = event->group_leader; |
68 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
69 | /* Include count for the event */ |
70 | int counters = 1; |
71 | |
72 | if (!is_software_event(event: leader)) { |
73 | /* |
74 | * We must NOT create groups containing mixed PMUs, although |
75 | * software events are acceptable |
76 | */ |
77 | if (leader->pmu != event->pmu) |
78 | return false; |
79 | |
80 | /* Increment counter for the leader */ |
81 | if (leader != event) |
82 | counters++; |
83 | } |
84 | |
85 | for_each_sibling_event(sibling, event->group_leader) { |
86 | if (is_software_event(event: sibling)) |
87 | continue; |
88 | if (sibling->pmu != event->pmu) |
89 | return false; |
90 | /* Increment counter for each sibling */ |
91 | counters++; |
92 | } |
93 | |
94 | /* The group can not count events more than the counters in the HW */ |
95 | return counters <= hisi_pmu->num_counters; |
96 | } |
97 | |
98 | int hisi_uncore_pmu_get_event_idx(struct perf_event *event) |
99 | { |
100 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
101 | unsigned long *used_mask = hisi_pmu->pmu_events.used_mask; |
102 | u32 num_counters = hisi_pmu->num_counters; |
103 | int idx; |
104 | |
105 | idx = find_first_zero_bit(addr: used_mask, size: num_counters); |
106 | if (idx == num_counters) |
107 | return -EAGAIN; |
108 | |
109 | set_bit(nr: idx, addr: used_mask); |
110 | |
111 | return idx; |
112 | } |
113 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); |
114 | |
115 | ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, |
116 | struct device_attribute *attr, |
117 | char *page) |
118 | { |
119 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); |
120 | |
121 | return sysfs_emit(buf: page, fmt: "0x%08x\n" , hisi_pmu->identifier); |
122 | } |
123 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); |
124 | |
125 | static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) |
126 | { |
127 | clear_bit(nr: idx, addr: hisi_pmu->pmu_events.used_mask); |
128 | } |
129 | |
130 | static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) |
131 | { |
132 | struct hisi_pmu *hisi_pmu = data; |
133 | struct perf_event *event; |
134 | unsigned long overflown; |
135 | int idx; |
136 | |
137 | overflown = hisi_pmu->ops->get_int_status(hisi_pmu); |
138 | if (!overflown) |
139 | return IRQ_NONE; |
140 | |
141 | /* |
142 | * Find the counter index which overflowed if the bit was set |
143 | * and handle it. |
144 | */ |
145 | for_each_set_bit(idx, &overflown, hisi_pmu->num_counters) { |
146 | /* Write 1 to clear the IRQ status flag */ |
147 | hisi_pmu->ops->clear_int_status(hisi_pmu, idx); |
148 | /* Get the corresponding event struct */ |
149 | event = hisi_pmu->pmu_events.hw_events[idx]; |
150 | if (!event) |
151 | continue; |
152 | |
153 | hisi_uncore_pmu_event_update(event); |
154 | hisi_uncore_pmu_set_event_period(event); |
155 | } |
156 | |
157 | return IRQ_HANDLED; |
158 | } |
159 | |
160 | int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, |
161 | struct platform_device *pdev) |
162 | { |
163 | int irq, ret; |
164 | |
165 | irq = platform_get_irq(pdev, 0); |
166 | if (irq < 0) |
167 | return irq; |
168 | |
169 | ret = devm_request_irq(dev: &pdev->dev, irq, handler: hisi_uncore_pmu_isr, |
170 | IRQF_NOBALANCING | IRQF_NO_THREAD, |
171 | devname: dev_name(dev: &pdev->dev), dev_id: hisi_pmu); |
172 | if (ret < 0) { |
173 | dev_err(&pdev->dev, |
174 | "Fail to request IRQ: %d ret: %d.\n" , irq, ret); |
175 | return ret; |
176 | } |
177 | |
178 | hisi_pmu->irq = irq; |
179 | |
180 | return 0; |
181 | } |
182 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq); |
183 | |
184 | int hisi_uncore_pmu_event_init(struct perf_event *event) |
185 | { |
186 | struct hw_perf_event *hwc = &event->hw; |
187 | struct hisi_pmu *hisi_pmu; |
188 | |
189 | if (event->attr.type != event->pmu->type) |
190 | return -ENOENT; |
191 | |
192 | /* |
193 | * We do not support sampling as the counters are all |
194 | * shared by all CPU cores in a CPU die(SCCL). Also we |
195 | * do not support attach to a task(per-process mode) |
196 | */ |
197 | if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) |
198 | return -EOPNOTSUPP; |
199 | |
200 | /* |
201 | * The uncore counters not specific to any CPU, so cannot |
202 | * support per-task |
203 | */ |
204 | if (event->cpu < 0) |
205 | return -EINVAL; |
206 | |
207 | /* |
208 | * Validate if the events in group does not exceed the |
209 | * available counters in hardware. |
210 | */ |
211 | if (!hisi_validate_event_group(event)) |
212 | return -EINVAL; |
213 | |
214 | hisi_pmu = to_hisi_pmu(event->pmu); |
215 | if (event->attr.config > hisi_pmu->check_event) |
216 | return -EINVAL; |
217 | |
218 | if (hisi_pmu->on_cpu == -1) |
219 | return -EINVAL; |
220 | /* |
221 | * We don't assign an index until we actually place the event onto |
222 | * hardware. Use -1 to signify that we haven't decided where to put it |
223 | * yet. |
224 | */ |
225 | hwc->idx = -1; |
226 | hwc->config_base = event->attr.config; |
227 | |
228 | if (hisi_pmu->ops->check_filter && hisi_pmu->ops->check_filter(event)) |
229 | return -EINVAL; |
230 | |
231 | /* Enforce to use the same CPU for all events in this PMU */ |
232 | event->cpu = hisi_pmu->on_cpu; |
233 | |
234 | return 0; |
235 | } |
236 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init); |
237 | |
238 | /* |
239 | * Set the counter to count the event that we're interested in, |
240 | * and enable interrupt and counter. |
241 | */ |
242 | static void hisi_uncore_pmu_enable_event(struct perf_event *event) |
243 | { |
244 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
245 | struct hw_perf_event *hwc = &event->hw; |
246 | |
247 | hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx, |
248 | HISI_GET_EVENTID(event)); |
249 | |
250 | if (hisi_pmu->ops->enable_filter) |
251 | hisi_pmu->ops->enable_filter(event); |
252 | |
253 | hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc); |
254 | hisi_pmu->ops->enable_counter(hisi_pmu, hwc); |
255 | } |
256 | |
257 | /* |
258 | * Disable counter and interrupt. |
259 | */ |
260 | static void hisi_uncore_pmu_disable_event(struct perf_event *event) |
261 | { |
262 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
263 | struct hw_perf_event *hwc = &event->hw; |
264 | |
265 | hisi_pmu->ops->disable_counter(hisi_pmu, hwc); |
266 | hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc); |
267 | |
268 | if (hisi_pmu->ops->disable_filter) |
269 | hisi_pmu->ops->disable_filter(event); |
270 | } |
271 | |
272 | void hisi_uncore_pmu_set_event_period(struct perf_event *event) |
273 | { |
274 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
275 | struct hw_perf_event *hwc = &event->hw; |
276 | |
277 | /* |
278 | * The HiSilicon PMU counters support 32 bits or 48 bits, depending on |
279 | * the PMU. We reduce it to 2^(counter_bits - 1) to account for the |
280 | * extreme interrupt latency. So we could hopefully handle the overflow |
281 | * interrupt before another 2^(counter_bits - 1) events occur and the |
282 | * counter overtakes its previous value. |
283 | */ |
284 | u64 val = BIT_ULL(hisi_pmu->counter_bits - 1); |
285 | |
286 | local64_set(&hwc->prev_count, val); |
287 | /* Write start value to the hardware event counter */ |
288 | hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); |
289 | } |
290 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period); |
291 | |
292 | void hisi_uncore_pmu_event_update(struct perf_event *event) |
293 | { |
294 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
295 | struct hw_perf_event *hwc = &event->hw; |
296 | u64 delta, prev_raw_count, new_raw_count; |
297 | |
298 | do { |
299 | /* Read the count from the counter register */ |
300 | new_raw_count = hisi_pmu->ops->read_counter(hisi_pmu, hwc); |
301 | prev_raw_count = local64_read(&hwc->prev_count); |
302 | } while (local64_cmpxchg(l: &hwc->prev_count, old: prev_raw_count, |
303 | new: new_raw_count) != prev_raw_count); |
304 | /* |
305 | * compute the delta |
306 | */ |
307 | delta = (new_raw_count - prev_raw_count) & |
308 | HISI_MAX_PERIOD(hisi_pmu->counter_bits); |
309 | local64_add(delta, &event->count); |
310 | } |
311 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); |
312 | |
313 | void hisi_uncore_pmu_start(struct perf_event *event, int flags) |
314 | { |
315 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
316 | struct hw_perf_event *hwc = &event->hw; |
317 | |
318 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) |
319 | return; |
320 | |
321 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); |
322 | hwc->state = 0; |
323 | hisi_uncore_pmu_set_event_period(event); |
324 | |
325 | if (flags & PERF_EF_RELOAD) { |
326 | u64 prev_raw_count = local64_read(&hwc->prev_count); |
327 | |
328 | hisi_pmu->ops->write_counter(hisi_pmu, hwc, prev_raw_count); |
329 | } |
330 | |
331 | hisi_uncore_pmu_enable_event(event); |
332 | perf_event_update_userpage(event); |
333 | } |
334 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); |
335 | |
336 | void hisi_uncore_pmu_stop(struct perf_event *event, int flags) |
337 | { |
338 | struct hw_perf_event *hwc = &event->hw; |
339 | |
340 | hisi_uncore_pmu_disable_event(event); |
341 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); |
342 | hwc->state |= PERF_HES_STOPPED; |
343 | |
344 | if (hwc->state & PERF_HES_UPTODATE) |
345 | return; |
346 | |
347 | /* Read hardware counter and update the perf counter statistics */ |
348 | hisi_uncore_pmu_event_update(event); |
349 | hwc->state |= PERF_HES_UPTODATE; |
350 | } |
351 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop); |
352 | |
353 | int hisi_uncore_pmu_add(struct perf_event *event, int flags) |
354 | { |
355 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
356 | struct hw_perf_event *hwc = &event->hw; |
357 | int idx; |
358 | |
359 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; |
360 | |
361 | /* Get an available counter index for counting */ |
362 | idx = hisi_pmu->ops->get_event_idx(event); |
363 | if (idx < 0) |
364 | return idx; |
365 | |
366 | event->hw.idx = idx; |
367 | hisi_pmu->pmu_events.hw_events[idx] = event; |
368 | |
369 | if (flags & PERF_EF_START) |
370 | hisi_uncore_pmu_start(event, PERF_EF_RELOAD); |
371 | |
372 | return 0; |
373 | } |
374 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add); |
375 | |
376 | void hisi_uncore_pmu_del(struct perf_event *event, int flags) |
377 | { |
378 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); |
379 | struct hw_perf_event *hwc = &event->hw; |
380 | |
381 | hisi_uncore_pmu_stop(event, PERF_EF_UPDATE); |
382 | hisi_uncore_pmu_clear_event_idx(hisi_pmu, idx: hwc->idx); |
383 | perf_event_update_userpage(event); |
384 | hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; |
385 | } |
386 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del); |
387 | |
388 | void hisi_uncore_pmu_read(struct perf_event *event) |
389 | { |
390 | /* Read hardware counter and update the perf counter statistics */ |
391 | hisi_uncore_pmu_event_update(event); |
392 | } |
393 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); |
394 | |
395 | void hisi_uncore_pmu_enable(struct pmu *pmu) |
396 | { |
397 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu); |
398 | bool enabled = !bitmap_empty(src: hisi_pmu->pmu_events.used_mask, |
399 | nbits: hisi_pmu->num_counters); |
400 | |
401 | if (!enabled) |
402 | return; |
403 | |
404 | hisi_pmu->ops->start_counters(hisi_pmu); |
405 | } |
406 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable); |
407 | |
408 | void hisi_uncore_pmu_disable(struct pmu *pmu) |
409 | { |
410 | struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu); |
411 | |
412 | hisi_pmu->ops->stop_counters(hisi_pmu); |
413 | } |
414 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); |
415 | |
416 | |
417 | /* |
418 | * The Super CPU Cluster (SCCL) and CPU Cluster (CCL) IDs can be |
419 | * determined from the MPIDR_EL1, but the encoding varies by CPU: |
420 | * |
421 | * - For MT variants of TSV110: |
422 | * SCCL is Aff2[7:3], CCL is Aff2[2:0] |
423 | * |
424 | * - For other MT parts: |
425 | * SCCL is Aff3[7:0], CCL is Aff2[7:0] |
426 | * |
427 | * - For non-MT parts: |
428 | * SCCL is Aff2[7:0], CCL is Aff1[7:0] |
429 | */ |
430 | static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) |
431 | { |
432 | u64 mpidr = read_cpuid_mpidr(); |
433 | int aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3); |
434 | int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); |
435 | int aff1 = MPIDR_AFFINITY_LEVEL(mpidr, 1); |
436 | bool mt = mpidr & MPIDR_MT_BITMASK; |
437 | int sccl, ccl; |
438 | |
439 | if (mt && read_cpuid_part_number() == HISI_CPU_PART_TSV110) { |
440 | sccl = aff2 >> 3; |
441 | ccl = aff2 & 0x7; |
442 | } else if (mt) { |
443 | sccl = aff3; |
444 | ccl = aff2; |
445 | } else { |
446 | sccl = aff2; |
447 | ccl = aff1; |
448 | } |
449 | |
450 | if (scclp) |
451 | *scclp = sccl; |
452 | if (cclp) |
453 | *cclp = ccl; |
454 | } |
455 | |
456 | /* |
457 | * Check whether the CPU is associated with this uncore PMU |
458 | */ |
459 | static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) |
460 | { |
461 | int sccl_id, ccl_id; |
462 | |
463 | /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */ |
464 | if (hisi_pmu->sccl_id == -1) |
465 | return true; |
466 | |
467 | if (hisi_pmu->ccl_id == -1) { |
468 | /* If CCL_ID is -1, the PMU only shares the same SCCL */ |
469 | hisi_read_sccl_and_ccl_id(scclp: &sccl_id, NULL); |
470 | |
471 | return sccl_id == hisi_pmu->sccl_id; |
472 | } |
473 | |
474 | hisi_read_sccl_and_ccl_id(scclp: &sccl_id, cclp: &ccl_id); |
475 | |
476 | return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id; |
477 | } |
478 | |
479 | int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) |
480 | { |
481 | struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, |
482 | node); |
483 | |
484 | if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) |
485 | return 0; |
486 | |
487 | cpumask_set_cpu(cpu, dstp: &hisi_pmu->associated_cpus); |
488 | |
489 | /* If another CPU is already managing this PMU, simply return. */ |
490 | if (hisi_pmu->on_cpu != -1) |
491 | return 0; |
492 | |
493 | /* Use this CPU in cpumask for event counting */ |
494 | hisi_pmu->on_cpu = cpu; |
495 | |
496 | /* Overflow interrupt also should use the same CPU */ |
497 | WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); |
498 | |
499 | return 0; |
500 | } |
501 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu); |
502 | |
503 | int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) |
504 | { |
505 | struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, |
506 | node); |
507 | cpumask_t pmu_online_cpus; |
508 | unsigned int target; |
509 | |
510 | if (!cpumask_test_and_clear_cpu(cpu, cpumask: &hisi_pmu->associated_cpus)) |
511 | return 0; |
512 | |
513 | /* Nothing to do if this CPU doesn't own the PMU */ |
514 | if (hisi_pmu->on_cpu != cpu) |
515 | return 0; |
516 | |
517 | /* Give up ownership of the PMU */ |
518 | hisi_pmu->on_cpu = -1; |
519 | |
520 | /* Choose a new CPU to migrate ownership of the PMU to */ |
521 | cpumask_and(dstp: &pmu_online_cpus, src1p: &hisi_pmu->associated_cpus, |
522 | cpu_online_mask); |
523 | target = cpumask_any_but(mask: &pmu_online_cpus, cpu); |
524 | if (target >= nr_cpu_ids) |
525 | return 0; |
526 | |
527 | perf_pmu_migrate_context(pmu: &hisi_pmu->pmu, src_cpu: cpu, dst_cpu: target); |
528 | /* Use this CPU for event counting */ |
529 | hisi_pmu->on_cpu = target; |
530 | WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); |
531 | |
532 | return 0; |
533 | } |
534 | EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); |
535 | |
536 | void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) |
537 | { |
538 | struct pmu *pmu = &hisi_pmu->pmu; |
539 | |
540 | pmu->module = module; |
541 | pmu->task_ctx_nr = perf_invalid_context; |
542 | pmu->event_init = hisi_uncore_pmu_event_init; |
543 | pmu->pmu_enable = hisi_uncore_pmu_enable; |
544 | pmu->pmu_disable = hisi_uncore_pmu_disable; |
545 | pmu->add = hisi_uncore_pmu_add; |
546 | pmu->del = hisi_uncore_pmu_del; |
547 | pmu->start = hisi_uncore_pmu_start; |
548 | pmu->stop = hisi_uncore_pmu_stop; |
549 | pmu->read = hisi_uncore_pmu_read; |
550 | pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; |
551 | pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; |
552 | } |
553 | EXPORT_SYMBOL_GPL(hisi_pmu_init); |
554 | |
555 | MODULE_LICENSE("GPL v2" ); |
556 | |