1// SPDX-License-Identifier: GPL-2.0
2/*
3 * RISC-V performance counter support.
4 *
5 * Copyright (C) 2021 Western Digital Corporation or its affiliates.
6 *
7 * This code is based on ARM perf event code which is in turn based on
8 * sparc64 and x86 code.
9 */
10
11#define pr_fmt(fmt) "riscv-pmu-sbi: " fmt
12
13#include <linux/mod_devicetable.h>
14#include <linux/perf/riscv_pmu.h>
15#include <linux/platform_device.h>
16#include <linux/irq.h>
17#include <linux/irqdomain.h>
18#include <linux/of_irq.h>
19#include <linux/of.h>
20#include <linux/cpu_pm.h>
21#include <linux/sched/clock.h>
22#include <linux/soc/andes/irq.h>
23
24#include <asm/errata_list.h>
25#include <asm/sbi.h>
26#include <asm/cpufeature.h>
27
28#define ALT_SBI_PMU_OVERFLOW(__ovl) \
29asm volatile(ALTERNATIVE_2( \
30 "csrr %0, " __stringify(CSR_SSCOUNTOVF), \
31 "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \
32 THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \
33 CONFIG_ERRATA_THEAD_PMU, \
34 "csrr %0, " __stringify(ANDES_CSR_SCOUNTEROF), \
35 0, RISCV_ISA_EXT_XANDESPMU, \
36 CONFIG_ANDES_CUSTOM_PMU) \
37 : "=r" (__ovl) : \
38 : "memory")
39
40#define ALT_SBI_PMU_OVF_CLEAR_PENDING(__irq_mask) \
41asm volatile(ALTERNATIVE( \
42 "csrc " __stringify(CSR_IP) ", %0\n\t", \
43 "csrc " __stringify(ANDES_CSR_SLIP) ", %0\n\t", \
44 0, RISCV_ISA_EXT_XANDESPMU, \
45 CONFIG_ANDES_CUSTOM_PMU) \
46 : : "r"(__irq_mask) \
47 : "memory")
48
49#define SYSCTL_NO_USER_ACCESS 0
50#define SYSCTL_USER_ACCESS 1
51#define SYSCTL_LEGACY 2
52
53#define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS)
54#define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS)
55#define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
56
57PMU_FORMAT_ATTR(event, "config:0-47");
58PMU_FORMAT_ATTR(firmware, "config:63");
59
60static struct attribute *riscv_arch_formats_attr[] = {
61 &format_attr_event.attr,
62 &format_attr_firmware.attr,
63 NULL,
64};
65
66static struct attribute_group riscv_pmu_format_group = {
67 .name = "format",
68 .attrs = riscv_arch_formats_attr,
69};
70
71static const struct attribute_group *riscv_pmu_attr_groups[] = {
72 &riscv_pmu_format_group,
73 NULL,
74};
75
76/* Allow user mode access by default */
77static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS;
78
79/*
80 * RISC-V doesn't have heterogeneous harts yet. This need to be part of
81 * per_cpu in case of harts with different pmu counters
82 */
83static union sbi_pmu_ctr_info *pmu_ctr_list;
84static bool riscv_pmu_use_irq;
85static unsigned int riscv_pmu_irq_num;
86static unsigned int riscv_pmu_irq_mask;
87static unsigned int riscv_pmu_irq;
88
89/* Cache the available counters in a bitmask */
90static unsigned long cmask;
91
92struct sbi_pmu_event_data {
93 union {
94 union {
95 struct hw_gen_event {
96 uint32_t event_code:16;
97 uint32_t event_type:4;
98 uint32_t reserved:12;
99 } hw_gen_event;
100 struct hw_cache_event {
101 uint32_t result_id:1;
102 uint32_t op_id:2;
103 uint32_t cache_id:13;
104 uint32_t event_type:4;
105 uint32_t reserved:12;
106 } hw_cache_event;
107 };
108 uint32_t event_idx;
109 };
110};
111
112static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
113 [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
114 SBI_PMU_HW_CPU_CYCLES,
115 SBI_PMU_EVENT_TYPE_HW, 0}},
116 [PERF_COUNT_HW_INSTRUCTIONS] = {.hw_gen_event = {
117 SBI_PMU_HW_INSTRUCTIONS,
118 SBI_PMU_EVENT_TYPE_HW, 0}},
119 [PERF_COUNT_HW_CACHE_REFERENCES] = {.hw_gen_event = {
120 SBI_PMU_HW_CACHE_REFERENCES,
121 SBI_PMU_EVENT_TYPE_HW, 0}},
122 [PERF_COUNT_HW_CACHE_MISSES] = {.hw_gen_event = {
123 SBI_PMU_HW_CACHE_MISSES,
124 SBI_PMU_EVENT_TYPE_HW, 0}},
125 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {.hw_gen_event = {
126 SBI_PMU_HW_BRANCH_INSTRUCTIONS,
127 SBI_PMU_EVENT_TYPE_HW, 0}},
128 [PERF_COUNT_HW_BRANCH_MISSES] = {.hw_gen_event = {
129 SBI_PMU_HW_BRANCH_MISSES,
130 SBI_PMU_EVENT_TYPE_HW, 0}},
131 [PERF_COUNT_HW_BUS_CYCLES] = {.hw_gen_event = {
132 SBI_PMU_HW_BUS_CYCLES,
133 SBI_PMU_EVENT_TYPE_HW, 0}},
134 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = {
135 SBI_PMU_HW_STALLED_CYCLES_FRONTEND,
136 SBI_PMU_EVENT_TYPE_HW, 0}},
137 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {.hw_gen_event = {
138 SBI_PMU_HW_STALLED_CYCLES_BACKEND,
139 SBI_PMU_EVENT_TYPE_HW, 0}},
140 [PERF_COUNT_HW_REF_CPU_CYCLES] = {.hw_gen_event = {
141 SBI_PMU_HW_REF_CPU_CYCLES,
142 SBI_PMU_EVENT_TYPE_HW, 0}},
143};
144
145#define C(x) PERF_COUNT_HW_CACHE_##x
146static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
147[PERF_COUNT_HW_CACHE_OP_MAX]
148[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
149 [C(L1D)] = {
150 [C(OP_READ)] = {
151 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
152 C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
153 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
154 C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
155 },
156 [C(OP_WRITE)] = {
157 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
158 C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
159 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
160 C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
161 },
162 [C(OP_PREFETCH)] = {
163 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
164 C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
165 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
166 C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
167 },
168 },
169 [C(L1I)] = {
170 [C(OP_READ)] = {
171 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
172 C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
173 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ),
174 C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
175 },
176 [C(OP_WRITE)] = {
177 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
178 C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
179 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
180 C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
181 },
182 [C(OP_PREFETCH)] = {
183 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
184 C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
185 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
186 C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
187 },
188 },
189 [C(LL)] = {
190 [C(OP_READ)] = {
191 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
192 C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
193 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
194 C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
195 },
196 [C(OP_WRITE)] = {
197 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
198 C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
199 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
200 C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
201 },
202 [C(OP_PREFETCH)] = {
203 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
204 C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
205 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
206 C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
207 },
208 },
209 [C(DTLB)] = {
210 [C(OP_READ)] = {
211 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
212 C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
213 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
214 C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
215 },
216 [C(OP_WRITE)] = {
217 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
218 C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
219 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
220 C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
221 },
222 [C(OP_PREFETCH)] = {
223 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
224 C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
225 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
226 C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
227 },
228 },
229 [C(ITLB)] = {
230 [C(OP_READ)] = {
231 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
232 C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
233 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
234 C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
235 },
236 [C(OP_WRITE)] = {
237 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
238 C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
239 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
240 C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
241 },
242 [C(OP_PREFETCH)] = {
243 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
244 C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
245 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
246 C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
247 },
248 },
249 [C(BPU)] = {
250 [C(OP_READ)] = {
251 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
252 C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
253 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
254 C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
255 },
256 [C(OP_WRITE)] = {
257 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
258 C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
259 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
260 C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
261 },
262 [C(OP_PREFETCH)] = {
263 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
264 C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
265 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
266 C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
267 },
268 },
269 [C(NODE)] = {
270 [C(OP_READ)] = {
271 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
272 C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
273 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
274 C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
275 },
276 [C(OP_WRITE)] = {
277 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
278 C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
279 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
280 C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
281 },
282 [C(OP_PREFETCH)] = {
283 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
284 C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
285 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
286 C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
287 },
288 },
289};
290
291static int pmu_sbi_ctr_get_width(int idx)
292{
293 return pmu_ctr_list[idx].width;
294}
295
296static bool pmu_sbi_ctr_is_fw(int cidx)
297{
298 union sbi_pmu_ctr_info *info;
299
300 info = &pmu_ctr_list[cidx];
301 if (!info)
302 return false;
303
304 return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false;
305}
306
307/*
308 * Returns the counter width of a programmable counter and number of hardware
309 * counters. As we don't support heterogeneous CPUs yet, it is okay to just
310 * return the counter width of the first programmable counter.
311 */
312int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr)
313{
314 int i;
315 union sbi_pmu_ctr_info *info;
316 u32 hpm_width = 0, hpm_count = 0;
317
318 if (!cmask)
319 return -EINVAL;
320
321 for_each_set_bit(i, &cmask, RISCV_MAX_COUNTERS) {
322 info = &pmu_ctr_list[i];
323 if (!info)
324 continue;
325 if (!hpm_width && info->csr != CSR_CYCLE && info->csr != CSR_INSTRET)
326 hpm_width = info->width;
327 if (info->type == SBI_PMU_CTR_TYPE_HW)
328 hpm_count++;
329 }
330
331 *hw_ctr_width = hpm_width;
332 *num_hw_ctr = hpm_count;
333
334 return 0;
335}
336EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info);
337
338static uint8_t pmu_sbi_csr_index(struct perf_event *event)
339{
340 return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
341}
342
343static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event)
344{
345 unsigned long cflags = 0;
346 bool guest_events = false;
347
348 if (event->attr.config1 & RISCV_PMU_CONFIG1_GUEST_EVENTS)
349 guest_events = true;
350 if (event->attr.exclude_kernel)
351 cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VSINH : SBI_PMU_CFG_FLAG_SET_SINH;
352 if (event->attr.exclude_user)
353 cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VUINH : SBI_PMU_CFG_FLAG_SET_UINH;
354 if (guest_events && event->attr.exclude_hv)
355 cflags |= SBI_PMU_CFG_FLAG_SET_SINH;
356 if (event->attr.exclude_host)
357 cflags |= SBI_PMU_CFG_FLAG_SET_UINH | SBI_PMU_CFG_FLAG_SET_SINH;
358 if (event->attr.exclude_guest)
359 cflags |= SBI_PMU_CFG_FLAG_SET_VSINH | SBI_PMU_CFG_FLAG_SET_VUINH;
360
361 return cflags;
362}
363
364static int pmu_sbi_ctr_get_idx(struct perf_event *event)
365{
366 struct hw_perf_event *hwc = &event->hw;
367 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
368 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
369 struct sbiret ret;
370 int idx;
371 uint64_t cbase = 0, cmask = rvpmu->cmask;
372 unsigned long cflags = 0;
373
374 cflags = pmu_sbi_get_filter_flags(event);
375
376 /*
377 * In legacy mode, we have to force the fixed counters for those events
378 * but not in the user access mode as we want to use the other counters
379 * that support sampling/filtering.
380 */
381 if (hwc->flags & PERF_EVENT_FLAG_LEGACY) {
382 if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
383 cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
384 cmask = 1;
385 } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
386 cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
387 cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
388 }
389 }
390
391 /* retrieve the available counter index */
392#if defined(CONFIG_32BIT)
393 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
394 cmask, cflags, hwc->event_base, hwc->config,
395 hwc->config >> 32);
396#else
397 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
398 cmask, cflags, hwc->event_base, hwc->config, 0);
399#endif
400 if (ret.error) {
401 pr_debug("Not able to find a counter for event %lx config %llx\n",
402 hwc->event_base, hwc->config);
403 return sbi_err_map_linux_errno(ret.error);
404 }
405
406 idx = ret.value;
407 if (!test_bit(idx, &rvpmu->cmask) || !pmu_ctr_list[idx].value)
408 return -ENOENT;
409
410 /* Additional sanity check for the counter id */
411 if (pmu_sbi_ctr_is_fw(cidx: idx)) {
412 if (!test_and_set_bit(nr: idx, addr: cpuc->used_fw_ctrs))
413 return idx;
414 } else {
415 if (!test_and_set_bit(nr: idx, addr: cpuc->used_hw_ctrs))
416 return idx;
417 }
418
419 return -ENOENT;
420}
421
422static void pmu_sbi_ctr_clear_idx(struct perf_event *event)
423{
424
425 struct hw_perf_event *hwc = &event->hw;
426 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
427 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
428 int idx = hwc->idx;
429
430 if (pmu_sbi_ctr_is_fw(cidx: idx))
431 clear_bit(nr: idx, addr: cpuc->used_fw_ctrs);
432 else
433 clear_bit(nr: idx, addr: cpuc->used_hw_ctrs);
434}
435
436static int pmu_event_find_cache(u64 config)
437{
438 unsigned int cache_type, cache_op, cache_result, ret;
439
440 cache_type = (config >> 0) & 0xff;
441 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
442 return -EINVAL;
443
444 cache_op = (config >> 8) & 0xff;
445 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
446 return -EINVAL;
447
448 cache_result = (config >> 16) & 0xff;
449 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
450 return -EINVAL;
451
452 ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx;
453
454 return ret;
455}
456
457static bool pmu_sbi_is_fw_event(struct perf_event *event)
458{
459 u32 type = event->attr.type;
460 u64 config = event->attr.config;
461
462 if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1))
463 return true;
464 else
465 return false;
466}
467
468static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
469{
470 u32 type = event->attr.type;
471 u64 config = event->attr.config;
472 int bSoftware;
473 u64 raw_config_val;
474 int ret;
475
476 switch (type) {
477 case PERF_TYPE_HARDWARE:
478 if (config >= PERF_COUNT_HW_MAX)
479 return -EINVAL;
480 ret = pmu_hw_event_map[event->attr.config].event_idx;
481 break;
482 case PERF_TYPE_HW_CACHE:
483 ret = pmu_event_find_cache(config);
484 break;
485 case PERF_TYPE_RAW:
486 /*
487 * As per SBI specification, the upper 16 bits must be unused for
488 * a raw event. Use the MSB (63b) to distinguish between hardware
489 * raw event and firmware events.
490 */
491 bSoftware = config >> 63;
492 raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
493 if (bSoftware) {
494 ret = (raw_config_val & 0xFFFF) |
495 (SBI_PMU_EVENT_TYPE_FW << 16);
496 } else {
497 ret = RISCV_PMU_RAW_EVENT_IDX;
498 *econfig = raw_config_val;
499 }
500 break;
501 default:
502 ret = -EINVAL;
503 break;
504 }
505
506 return ret;
507}
508
509static u64 pmu_sbi_ctr_read(struct perf_event *event)
510{
511 struct hw_perf_event *hwc = &event->hw;
512 int idx = hwc->idx;
513 struct sbiret ret;
514 union sbi_pmu_ctr_info info;
515 u64 val = 0;
516
517 if (pmu_sbi_is_fw_event(event)) {
518 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
519 hwc->idx, 0, 0, 0, 0, 0);
520 if (!ret.error)
521 val = ret.value;
522 } else {
523 info = pmu_ctr_list[idx];
524 val = riscv_pmu_ctr_read_csr(info.csr);
525 if (IS_ENABLED(CONFIG_32BIT))
526 val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
527 }
528
529 return val;
530}
531
532static void pmu_sbi_set_scounteren(void *arg)
533{
534 struct perf_event *event = (struct perf_event *)arg;
535
536 if (event->hw.idx != -1)
537 csr_write(CSR_SCOUNTEREN,
538 csr_read(CSR_SCOUNTEREN) | BIT(pmu_sbi_csr_index(event)));
539}
540
541static void pmu_sbi_reset_scounteren(void *arg)
542{
543 struct perf_event *event = (struct perf_event *)arg;
544
545 if (event->hw.idx != -1)
546 csr_write(CSR_SCOUNTEREN,
547 csr_read(CSR_SCOUNTEREN) & ~BIT(pmu_sbi_csr_index(event)));
548}
549
550static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
551{
552 struct sbiret ret;
553 struct hw_perf_event *hwc = &event->hw;
554 unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
555
556#if defined(CONFIG_32BIT)
557 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
558 1, flag, ival, ival >> 32, 0);
559#else
560 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
561 1, flag, ival, 0, 0);
562#endif
563 if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
564 pr_err("Starting counter idx %d failed with error %d\n",
565 hwc->idx, sbi_err_map_linux_errno(ret.error));
566
567 if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
568 (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
569 pmu_sbi_set_scounteren(arg: (void *)event);
570}
571
572static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
573{
574 struct sbiret ret;
575 struct hw_perf_event *hwc = &event->hw;
576
577 if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
578 (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
579 pmu_sbi_reset_scounteren(arg: (void *)event);
580
581 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
582 if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
583 flag != SBI_PMU_STOP_FLAG_RESET)
584 pr_err("Stopping counter idx %d failed with error %d\n",
585 hwc->idx, sbi_err_map_linux_errno(ret.error));
586}
587
588static int pmu_sbi_find_num_ctrs(void)
589{
590 struct sbiret ret;
591
592 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
593 if (!ret.error)
594 return ret.value;
595 else
596 return sbi_err_map_linux_errno(ret.error);
597}
598
599static int pmu_sbi_get_ctrinfo(int nctr, unsigned long *mask)
600{
601 struct sbiret ret;
602 int i, num_hw_ctr = 0, num_fw_ctr = 0;
603 union sbi_pmu_ctr_info cinfo;
604
605 pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL);
606 if (!pmu_ctr_list)
607 return -ENOMEM;
608
609 for (i = 0; i < nctr; i++) {
610 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
611 if (ret.error)
612 /* The logical counter ids are not expected to be contiguous */
613 continue;
614
615 *mask |= BIT(i);
616
617 cinfo.value = ret.value;
618 if (cinfo.type == SBI_PMU_CTR_TYPE_FW)
619 num_fw_ctr++;
620 else
621 num_hw_ctr++;
622 pmu_ctr_list[i].value = cinfo.value;
623 }
624
625 pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr);
626
627 return 0;
628}
629
630static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
631{
632 /*
633 * No need to check the error because we are disabling all the counters
634 * which may include counters that are not enabled yet.
635 */
636 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
637 0, pmu->cmask, 0, 0, 0, 0);
638}
639
640static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
641{
642 struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
643
644 /* No need to check the error here as we can't do anything about the error */
645 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
646 cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
647}
648
649/*
650 * This function starts all the used counters in two step approach.
651 * Any counter that did not overflow can be start in a single step
652 * while the overflowed counters need to be started with updated initialization
653 * value.
654 */
655static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
656 unsigned long ctr_ovf_mask)
657{
658 int idx = 0;
659 struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
660 struct perf_event *event;
661 unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
662 unsigned long ctr_start_mask = 0;
663 uint64_t max_period;
664 struct hw_perf_event *hwc;
665 u64 init_val = 0;
666
667 ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
668
669 /* Start all the counters that did not overflow in a single shot */
670 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
671 0, 0, 0, 0);
672
673 /* Reinitialize and start all the counter that overflowed */
674 while (ctr_ovf_mask) {
675 if (ctr_ovf_mask & 0x01) {
676 event = cpu_hw_evt->events[idx];
677 hwc = &event->hw;
678 max_period = riscv_pmu_ctr_get_width_mask(event);
679 init_val = local64_read(&hwc->prev_count) & max_period;
680#if defined(CONFIG_32BIT)
681 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
682 flag, init_val, init_val >> 32, 0);
683#else
684 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
685 flag, init_val, 0, 0);
686#endif
687 perf_event_update_userpage(event);
688 }
689 ctr_ovf_mask = ctr_ovf_mask >> 1;
690 idx++;
691 }
692}
693
694static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
695{
696 struct perf_sample_data data;
697 struct pt_regs *regs;
698 struct hw_perf_event *hw_evt;
699 union sbi_pmu_ctr_info *info;
700 int lidx, hidx, fidx;
701 struct riscv_pmu *pmu;
702 struct perf_event *event;
703 unsigned long overflow;
704 unsigned long overflowed_ctrs = 0;
705 struct cpu_hw_events *cpu_hw_evt = dev;
706 u64 start_clock = sched_clock();
707
708 if (WARN_ON_ONCE(!cpu_hw_evt))
709 return IRQ_NONE;
710
711 /* Firmware counter don't support overflow yet */
712 fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
713 if (fidx == RISCV_MAX_COUNTERS) {
714 csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
715 return IRQ_NONE;
716 }
717
718 event = cpu_hw_evt->events[fidx];
719 if (!event) {
720 ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
721 return IRQ_NONE;
722 }
723
724 pmu = to_riscv_pmu(event->pmu);
725 pmu_sbi_stop_hw_ctrs(pmu);
726
727 /* Overflow status register should only be read after counter are stopped */
728 ALT_SBI_PMU_OVERFLOW(overflow);
729
730 /*
731 * Overflow interrupt pending bit should only be cleared after stopping
732 * all the counters to avoid any race condition.
733 */
734 ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
735
736 /* No overflow bit is set */
737 if (!overflow)
738 return IRQ_NONE;
739
740 regs = get_irq_regs();
741
742 for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
743 struct perf_event *event = cpu_hw_evt->events[lidx];
744
745 /* Skip if invalid event or user did not request a sampling */
746 if (!event || !is_sampling_event(event))
747 continue;
748
749 info = &pmu_ctr_list[lidx];
750 /* Do a sanity check */
751 if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
752 continue;
753
754 /* compute hardware counter index */
755 hidx = info->csr - CSR_CYCLE;
756 /* check if the corresponding bit is set in sscountovf */
757 if (!(overflow & BIT(hidx)))
758 continue;
759
760 /*
761 * Keep a track of overflowed counters so that they can be started
762 * with updated initial value.
763 */
764 overflowed_ctrs |= BIT(lidx);
765 hw_evt = &event->hw;
766 riscv_pmu_event_update(event);
767 perf_sample_data_init(&data, 0, hw_evt->last_period);
768 if (riscv_pmu_event_set_period(event)) {
769 /*
770 * Unlike other ISAs, RISC-V don't have to disable interrupts
771 * to avoid throttling here. As per the specification, the
772 * interrupt remains disabled until the OF bit is set.
773 * Interrupts are enabled again only during the start.
774 * TODO: We will need to stop the guest counters once
775 * virtualization support is added.
776 */
777 perf_event_overflow(event, &data, regs);
778 }
779 }
780
781 pmu_sbi_start_overflow_mask(pmu, ctr_ovf_mask: overflowed_ctrs);
782 perf_sample_event_took(sample_len_ns: sched_clock() - start_clock);
783
784 return IRQ_HANDLED;
785}
786
787static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
788{
789 struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
790 struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
791
792 /*
793 * We keep enabling userspace access to CYCLE, TIME and INSTRET via the
794 * legacy option but that will be removed in the future.
795 */
796 if (sysctl_perf_user_access == SYSCTL_LEGACY)
797 csr_write(CSR_SCOUNTEREN, 0x7);
798 else
799 csr_write(CSR_SCOUNTEREN, 0x2);
800
801 /* Stop all the counters so that they can be enabled from perf */
802 pmu_sbi_stop_all(pmu);
803
804 if (riscv_pmu_use_irq) {
805 cpu_hw_evt->irq = riscv_pmu_irq;
806 ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
807 enable_percpu_irq(irq: riscv_pmu_irq, type: IRQ_TYPE_NONE);
808 }
809
810 return 0;
811}
812
813static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
814{
815 if (riscv_pmu_use_irq) {
816 disable_percpu_irq(irq: riscv_pmu_irq);
817 }
818
819 /* Disable all counters access for user mode now */
820 csr_write(CSR_SCOUNTEREN, 0x0);
821
822 return 0;
823}
824
825static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev)
826{
827 int ret;
828 struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
829 struct irq_domain *domain = NULL;
830
831 if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
832 riscv_pmu_irq_num = RV_IRQ_PMU;
833 riscv_pmu_use_irq = true;
834 } else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) &&
835 riscv_cached_mvendorid(0) == THEAD_VENDOR_ID &&
836 riscv_cached_marchid(0) == 0 &&
837 riscv_cached_mimpid(0) == 0) {
838 riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU;
839 riscv_pmu_use_irq = true;
840 } else if (riscv_isa_extension_available(NULL, XANDESPMU) &&
841 IS_ENABLED(CONFIG_ANDES_CUSTOM_PMU)) {
842 riscv_pmu_irq_num = ANDES_SLI_CAUSE_BASE + ANDES_RV_IRQ_PMOVI;
843 riscv_pmu_use_irq = true;
844 }
845
846 riscv_pmu_irq_mask = BIT(riscv_pmu_irq_num % BITS_PER_LONG);
847
848 if (!riscv_pmu_use_irq)
849 return -EOPNOTSUPP;
850
851 domain = irq_find_matching_fwnode(fwnode: riscv_get_intc_hwnode(),
852 bus_token: DOMAIN_BUS_ANY);
853 if (!domain) {
854 pr_err("Failed to find INTC IRQ root domain\n");
855 return -ENODEV;
856 }
857
858 riscv_pmu_irq = irq_create_mapping(host: domain, hwirq: riscv_pmu_irq_num);
859 if (!riscv_pmu_irq) {
860 pr_err("Failed to map PMU interrupt for node\n");
861 return -ENODEV;
862 }
863
864 ret = request_percpu_irq(irq: riscv_pmu_irq, handler: pmu_sbi_ovf_handler, devname: "riscv-pmu", percpu_dev_id: hw_events);
865 if (ret) {
866 pr_err("registering percpu irq failed [%d]\n", ret);
867 return ret;
868 }
869
870 return 0;
871}
872
873#ifdef CONFIG_CPU_PM
874static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
875 void *v)
876{
877 struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb);
878 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
879 int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS);
880 struct perf_event *event;
881 int idx;
882
883 if (!enabled)
884 return NOTIFY_OK;
885
886 for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) {
887 event = cpuc->events[idx];
888 if (!event)
889 continue;
890
891 switch (cmd) {
892 case CPU_PM_ENTER:
893 /*
894 * Stop and update the counter
895 */
896 riscv_pmu_stop(event, PERF_EF_UPDATE);
897 break;
898 case CPU_PM_EXIT:
899 case CPU_PM_ENTER_FAILED:
900 /*
901 * Restore and enable the counter.
902 */
903 riscv_pmu_start(event, PERF_EF_RELOAD);
904 break;
905 default:
906 break;
907 }
908 }
909
910 return NOTIFY_OK;
911}
912
913static int riscv_pm_pmu_register(struct riscv_pmu *pmu)
914{
915 pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify;
916 return cpu_pm_register_notifier(&pmu->riscv_pm_nb);
917}
918
919static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu)
920{
921 cpu_pm_unregister_notifier(&pmu->riscv_pm_nb);
922}
923#else
924static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; }
925static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
926#endif
927
928static void riscv_pmu_destroy(struct riscv_pmu *pmu)
929{
930 riscv_pm_pmu_unregister(pmu);
931 cpuhp_state_remove_instance(state: CPUHP_AP_PERF_RISCV_STARTING, node: &pmu->node);
932}
933
934static void pmu_sbi_event_init(struct perf_event *event)
935{
936 /*
937 * The permissions are set at event_init so that we do not depend
938 * on the sysctl value that can change.
939 */
940 if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS)
941 event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS;
942 else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS)
943 event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS;
944 else
945 event->hw.flags |= PERF_EVENT_FLAG_LEGACY;
946}
947
948static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm)
949{
950 if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
951 return;
952
953 if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
954 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
955 event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
956 return;
957 }
958 }
959
960 /*
961 * The user mmapped the event to directly access it: this is where
962 * we determine based on sysctl_perf_user_access if we grant userspace
963 * the direct access to this event. That means that within the same
964 * task, some events may be directly accessible and some other may not,
965 * if the user changes the value of sysctl_perf_user_accesss in the
966 * meantime.
967 */
968
969 event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
970
971 /*
972 * We must enable userspace access *before* advertising in the user page
973 * that it is possible to do so to avoid any race.
974 * And we must notify all cpus here because threads that currently run
975 * on other cpus will try to directly access the counter too without
976 * calling pmu_sbi_ctr_start.
977 */
978 if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
979 on_each_cpu_mask(mask: mm_cpumask(mm),
980 func: pmu_sbi_set_scounteren, info: (void *)event, wait: 1);
981}
982
983static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm)
984{
985 if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
986 return;
987
988 if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
989 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
990 event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
991 return;
992 }
993 }
994
995 /*
996 * Here we can directly remove user access since the user does not have
997 * access to the user page anymore so we avoid the racy window where the
998 * user could have read cap_user_rdpmc to true right before we disable
999 * it.
1000 */
1001 event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
1002
1003 if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
1004 on_each_cpu_mask(mask: mm_cpumask(mm),
1005 func: pmu_sbi_reset_scounteren, info: (void *)event, wait: 1);
1006}
1007
1008static void riscv_pmu_update_counter_access(void *info)
1009{
1010 if (sysctl_perf_user_access == SYSCTL_LEGACY)
1011 csr_write(CSR_SCOUNTEREN, 0x7);
1012 else
1013 csr_write(CSR_SCOUNTEREN, 0x2);
1014}
1015
1016static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
1017 int write, void *buffer,
1018 size_t *lenp, loff_t *ppos)
1019{
1020 int prev = sysctl_perf_user_access;
1021 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1022
1023 /*
1024 * Test against the previous value since we clear SCOUNTEREN when
1025 * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should
1026 * not do that if that was already the case.
1027 */
1028 if (ret || !write || prev == sysctl_perf_user_access)
1029 return ret;
1030
1031 on_each_cpu(func: riscv_pmu_update_counter_access, NULL, wait: 1);
1032
1033 return 0;
1034}
1035
1036static struct ctl_table sbi_pmu_sysctl_table[] = {
1037 {
1038 .procname = "perf_user_access",
1039 .data = &sysctl_perf_user_access,
1040 .maxlen = sizeof(unsigned int),
1041 .mode = 0644,
1042 .proc_handler = riscv_pmu_proc_user_access_handler,
1043 .extra1 = SYSCTL_ZERO,
1044 .extra2 = SYSCTL_TWO,
1045 },
1046 { }
1047};
1048
1049static int pmu_sbi_device_probe(struct platform_device *pdev)
1050{
1051 struct riscv_pmu *pmu = NULL;
1052 int ret = -ENODEV;
1053 int num_counters;
1054
1055 pr_info("SBI PMU extension is available\n");
1056 pmu = riscv_pmu_alloc();
1057 if (!pmu)
1058 return -ENOMEM;
1059
1060 num_counters = pmu_sbi_find_num_ctrs();
1061 if (num_counters < 0) {
1062 pr_err("SBI PMU extension doesn't provide any counters\n");
1063 goto out_free;
1064 }
1065
1066 /* It is possible to get from SBI more than max number of counters */
1067 if (num_counters > RISCV_MAX_COUNTERS) {
1068 num_counters = RISCV_MAX_COUNTERS;
1069 pr_info("SBI returned more than maximum number of counters. Limiting the number of counters to %d\n", num_counters);
1070 }
1071
1072 /* cache all the information about counters now */
1073 if (pmu_sbi_get_ctrinfo(nctr: num_counters, mask: &cmask))
1074 goto out_free;
1075
1076 ret = pmu_sbi_setup_irqs(pmu, pdev);
1077 if (ret < 0) {
1078 pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n");
1079 pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1080 pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
1081 }
1082
1083 pmu->pmu.attr_groups = riscv_pmu_attr_groups;
1084 pmu->cmask = cmask;
1085 pmu->ctr_start = pmu_sbi_ctr_start;
1086 pmu->ctr_stop = pmu_sbi_ctr_stop;
1087 pmu->event_map = pmu_sbi_event_map;
1088 pmu->ctr_get_idx = pmu_sbi_ctr_get_idx;
1089 pmu->ctr_get_width = pmu_sbi_ctr_get_width;
1090 pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
1091 pmu->ctr_read = pmu_sbi_ctr_read;
1092 pmu->event_init = pmu_sbi_event_init;
1093 pmu->event_mapped = pmu_sbi_event_mapped;
1094 pmu->event_unmapped = pmu_sbi_event_unmapped;
1095 pmu->csr_index = pmu_sbi_csr_index;
1096
1097 ret = cpuhp_state_add_instance(state: CPUHP_AP_PERF_RISCV_STARTING, node: &pmu->node);
1098 if (ret)
1099 return ret;
1100
1101 ret = riscv_pm_pmu_register(pmu);
1102 if (ret)
1103 goto out_unregister;
1104
1105 ret = perf_pmu_register(pmu: &pmu->pmu, name: "cpu", type: PERF_TYPE_RAW);
1106 if (ret)
1107 goto out_unregister;
1108
1109 register_sysctl("kernel", sbi_pmu_sysctl_table);
1110
1111 return 0;
1112
1113out_unregister:
1114 riscv_pmu_destroy(pmu);
1115
1116out_free:
1117 kfree(objp: pmu);
1118 return ret;
1119}
1120
1121static struct platform_driver pmu_sbi_driver = {
1122 .probe = pmu_sbi_device_probe,
1123 .driver = {
1124 .name = RISCV_PMU_SBI_PDEV_NAME,
1125 },
1126};
1127
1128static int __init pmu_sbi_devinit(void)
1129{
1130 int ret;
1131 struct platform_device *pdev;
1132
1133 if (sbi_spec_version < sbi_mk_version(0, 3) ||
1134 !sbi_probe_extension(SBI_EXT_PMU)) {
1135 return 0;
1136 }
1137
1138 ret = cpuhp_setup_state_multi(state: CPUHP_AP_PERF_RISCV_STARTING,
1139 name: "perf/riscv/pmu:starting",
1140 startup: pmu_sbi_starting_cpu, teardown: pmu_sbi_dying_cpu);
1141 if (ret) {
1142 pr_err("CPU hotplug notifier could not be registered: %d\n",
1143 ret);
1144 return ret;
1145 }
1146
1147 ret = platform_driver_register(&pmu_sbi_driver);
1148 if (ret)
1149 return ret;
1150
1151 pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0);
1152 if (IS_ERR(ptr: pdev)) {
1153 platform_driver_unregister(&pmu_sbi_driver);
1154 return PTR_ERR(ptr: pdev);
1155 }
1156
1157 /* Notify legacy implementation that SBI pmu is available*/
1158 riscv_pmu_legacy_skip_init();
1159
1160 return ret;
1161}
1162device_initcall(pmu_sbi_devinit)
1163

source code of linux/drivers/perf/riscv_pmu_sbi.c