1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * ARMv8 PMUv3 Performance Events handling code. |
4 | * |
5 | * Copyright (C) 2012 ARM Limited |
6 | * Author: Will Deacon <will.deacon@arm.com> |
7 | * |
8 | * This code is based heavily on the ARMv7 perf event code. |
9 | */ |
10 | |
11 | #include <asm/irq_regs.h> |
12 | #include <asm/perf_event.h> |
13 | #include <asm/virt.h> |
14 | |
15 | #include <clocksource/arm_arch_timer.h> |
16 | |
17 | #include <linux/acpi.h> |
18 | #include <linux/bitfield.h> |
19 | #include <linux/clocksource.h> |
20 | #include <linux/of.h> |
21 | #include <linux/perf/arm_pmu.h> |
22 | #include <linux/perf/arm_pmuv3.h> |
23 | #include <linux/platform_device.h> |
24 | #include <linux/sched_clock.h> |
25 | #include <linux/smp.h> |
26 | #include <linux/nmi.h> |
27 | |
28 | #include <asm/arm_pmuv3.h> |
29 | |
30 | /* ARMv8 Cortex-A53 specific event types. */ |
31 | #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 |
32 | |
33 | /* ARMv8 Cavium ThunderX specific event types. */ |
34 | #define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST 0xE9 |
35 | #define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS 0xEA |
36 | #define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS 0xEB |
37 | #define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC |
38 | #define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED |
39 | |
40 | /* |
41 | * ARMv8 Architectural defined events, not all of these may |
42 | * be supported on any given implementation. Unsupported events will |
43 | * be disabled at run-time based on the PMCEID registers. |
44 | */ |
45 | static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { |
46 | PERF_MAP_ALL_UNSUPPORTED, |
47 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, |
48 | [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, |
49 | [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, |
50 | [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, |
51 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, |
52 | [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, |
53 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, |
54 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, |
55 | }; |
56 | |
57 | static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] |
58 | [PERF_COUNT_HW_CACHE_OP_MAX] |
59 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
60 | PERF_CACHE_MAP_ALL_UNSUPPORTED, |
61 | |
62 | [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, |
63 | [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, |
64 | |
65 | [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, |
66 | [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, |
67 | |
68 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, |
69 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, |
70 | |
71 | [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, |
72 | [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, |
73 | |
74 | [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD, |
75 | [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD, |
76 | |
77 | [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, |
78 | [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, |
79 | }; |
80 | |
81 | static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] |
82 | [PERF_COUNT_HW_CACHE_OP_MAX] |
83 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
84 | PERF_CACHE_MAP_ALL_UNSUPPORTED, |
85 | |
86 | [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL, |
87 | |
88 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, |
89 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, |
90 | }; |
91 | |
92 | static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] |
93 | [PERF_COUNT_HW_CACHE_OP_MAX] |
94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
95 | PERF_CACHE_MAP_ALL_UNSUPPORTED, |
96 | |
97 | [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, |
98 | [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, |
99 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, |
100 | [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, |
101 | |
102 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, |
103 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, |
104 | |
105 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, |
106 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, |
107 | }; |
108 | |
109 | static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] |
110 | [PERF_COUNT_HW_CACHE_OP_MAX] |
111 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
112 | PERF_CACHE_MAP_ALL_UNSUPPORTED, |
113 | |
114 | [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, |
115 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, |
116 | }; |
117 | |
118 | static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] |
119 | [PERF_COUNT_HW_CACHE_OP_MAX] |
120 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
121 | PERF_CACHE_MAP_ALL_UNSUPPORTED, |
122 | |
123 | [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, |
124 | [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, |
125 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, |
126 | [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST, |
127 | [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS, |
128 | [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS, |
129 | |
130 | [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS, |
131 | [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS, |
132 | |
133 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, |
134 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, |
135 | [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, |
136 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, |
137 | }; |
138 | |
139 | static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] |
140 | [PERF_COUNT_HW_CACHE_OP_MAX] |
141 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
142 | PERF_CACHE_MAP_ALL_UNSUPPORTED, |
143 | |
144 | [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, |
145 | [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, |
146 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, |
147 | [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, |
148 | |
149 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, |
150 | [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, |
151 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, |
152 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, |
153 | |
154 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, |
155 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, |
156 | }; |
157 | |
158 | static ssize_t |
159 | armv8pmu_events_sysfs_show(struct device *dev, |
160 | struct device_attribute *attr, char *page) |
161 | { |
162 | struct perf_pmu_events_attr *pmu_attr; |
163 | |
164 | pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); |
165 | |
166 | return sprintf(buf: page, fmt: "event=0x%04llx\n" , pmu_attr->id); |
167 | } |
168 | |
169 | #define ARMV8_EVENT_ATTR(name, config) \ |
170 | PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config) |
171 | |
172 | static struct attribute *armv8_pmuv3_event_attrs[] = { |
173 | /* |
174 | * Don't expose the sw_incr event in /sys. It's not usable as writes to |
175 | * PMSWINC_EL0 will trap as PMUSERENR.{SW,EN}=={0,0} and event rotation |
176 | * means we don't have a fixed event<->counter relationship regardless. |
177 | */ |
178 | ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), |
179 | ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), |
180 | ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), |
181 | ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE), |
182 | ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL), |
183 | ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED), |
184 | ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED), |
185 | ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED), |
186 | ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN), |
187 | ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN), |
188 | ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED), |
189 | ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED), |
190 | ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED), |
191 | ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED), |
192 | ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED), |
193 | ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED), |
194 | ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES), |
195 | ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED), |
196 | ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS), |
197 | ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE), |
198 | ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB), |
199 | ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE), |
200 | ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL), |
201 | ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB), |
202 | ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS), |
203 | ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR), |
204 | ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC), |
205 | ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED), |
206 | ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES), |
207 | /* Don't expose the chain event in /sys, since it's useless in isolation */ |
208 | ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE), |
209 | ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE), |
210 | ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED), |
211 | ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED), |
212 | ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND), |
213 | ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND), |
214 | ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB), |
215 | ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB), |
216 | ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE), |
217 | ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL), |
218 | ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE), |
219 | ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL), |
220 | ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE), |
221 | ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB), |
222 | ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL), |
223 | ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL), |
224 | ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB), |
225 | ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB), |
226 | ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS), |
227 | ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE), |
228 | ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS), |
229 | ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK), |
230 | ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK), |
231 | ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), |
232 | ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), |
233 | ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), |
234 | ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD), |
235 | ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED), |
236 | ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC), |
237 | ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL), |
238 | ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND), |
239 | ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND), |
240 | ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT), |
241 | ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), |
242 | ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), |
243 | ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), |
244 | ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), |
245 | ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES), |
246 | ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM), |
247 | ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS), |
248 | ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), |
249 | ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), |
250 | ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), |
251 | ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP), |
252 | ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG), |
253 | ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0), |
254 | ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1), |
255 | ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2), |
256 | ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3), |
257 | ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4), |
258 | ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5), |
259 | ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6), |
260 | ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7), |
261 | ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), |
262 | ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), |
263 | ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), |
264 | ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED), |
265 | ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD), |
266 | ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR), |
267 | NULL, |
268 | }; |
269 | |
270 | static umode_t |
271 | armv8pmu_event_attr_is_visible(struct kobject *kobj, |
272 | struct attribute *attr, int unused) |
273 | { |
274 | struct device *dev = kobj_to_dev(kobj); |
275 | struct pmu *pmu = dev_get_drvdata(dev); |
276 | struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); |
277 | struct perf_pmu_events_attr *pmu_attr; |
278 | |
279 | pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); |
280 | |
281 | if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && |
282 | test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) |
283 | return attr->mode; |
284 | |
285 | if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) { |
286 | u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; |
287 | |
288 | if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS && |
289 | test_bit(id, cpu_pmu->pmceid_ext_bitmap)) |
290 | return attr->mode; |
291 | } |
292 | |
293 | return 0; |
294 | } |
295 | |
296 | static const struct attribute_group armv8_pmuv3_events_attr_group = { |
297 | .name = "events" , |
298 | .attrs = armv8_pmuv3_event_attrs, |
299 | .is_visible = armv8pmu_event_attr_is_visible, |
300 | }; |
301 | |
302 | /* User ABI */ |
303 | #define ATTR_CFG_FLD_event_CFG config |
304 | #define ATTR_CFG_FLD_event_LO 0 |
305 | #define ATTR_CFG_FLD_event_HI 15 |
306 | #define ATTR_CFG_FLD_long_CFG config1 |
307 | #define ATTR_CFG_FLD_long_LO 0 |
308 | #define ATTR_CFG_FLD_long_HI 0 |
309 | #define ATTR_CFG_FLD_rdpmc_CFG config1 |
310 | #define ATTR_CFG_FLD_rdpmc_LO 1 |
311 | #define ATTR_CFG_FLD_rdpmc_HI 1 |
312 | #define ATTR_CFG_FLD_threshold_count_CFG config1 /* PMEVTYPER.TC[0] */ |
313 | #define ATTR_CFG_FLD_threshold_count_LO 2 |
314 | #define ATTR_CFG_FLD_threshold_count_HI 2 |
315 | #define ATTR_CFG_FLD_threshold_compare_CFG config1 /* PMEVTYPER.TC[2:1] */ |
316 | #define ATTR_CFG_FLD_threshold_compare_LO 3 |
317 | #define ATTR_CFG_FLD_threshold_compare_HI 4 |
318 | #define ATTR_CFG_FLD_threshold_CFG config1 /* PMEVTYPER.TH */ |
319 | #define ATTR_CFG_FLD_threshold_LO 5 |
320 | #define ATTR_CFG_FLD_threshold_HI 16 |
321 | |
322 | GEN_PMU_FORMAT_ATTR(event); |
323 | GEN_PMU_FORMAT_ATTR(long); |
324 | GEN_PMU_FORMAT_ATTR(rdpmc); |
325 | GEN_PMU_FORMAT_ATTR(threshold_count); |
326 | GEN_PMU_FORMAT_ATTR(threshold_compare); |
327 | GEN_PMU_FORMAT_ATTR(threshold); |
328 | |
329 | static int sysctl_perf_user_access __read_mostly; |
330 | |
331 | static bool armv8pmu_event_is_64bit(struct perf_event *event) |
332 | { |
333 | return ATTR_CFG_GET_FLD(&event->attr, long); |
334 | } |
335 | |
336 | static bool armv8pmu_event_want_user_access(struct perf_event *event) |
337 | { |
338 | return ATTR_CFG_GET_FLD(&event->attr, rdpmc); |
339 | } |
340 | |
341 | static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr) |
342 | { |
343 | u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare); |
344 | u8 th_count = ATTR_CFG_GET_FLD(attr, threshold_count); |
345 | |
346 | /* |
347 | * The count bit is always the bottom bit of the full control field, and |
348 | * the comparison is the upper two bits, but it's not explicitly |
349 | * labelled in the Arm ARM. For the Perf interface we split it into two |
350 | * fields, so reconstruct it here. |
351 | */ |
352 | return (th_compare << 1) | th_count; |
353 | } |
354 | |
355 | static struct attribute *armv8_pmuv3_format_attrs[] = { |
356 | &format_attr_event.attr, |
357 | &format_attr_long.attr, |
358 | &format_attr_rdpmc.attr, |
359 | &format_attr_threshold.attr, |
360 | &format_attr_threshold_compare.attr, |
361 | &format_attr_threshold_count.attr, |
362 | NULL, |
363 | }; |
364 | |
365 | static const struct attribute_group armv8_pmuv3_format_attr_group = { |
366 | .name = "format" , |
367 | .attrs = armv8_pmuv3_format_attrs, |
368 | }; |
369 | |
370 | static ssize_t slots_show(struct device *dev, struct device_attribute *attr, |
371 | char *page) |
372 | { |
373 | struct pmu *pmu = dev_get_drvdata(dev); |
374 | struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); |
375 | u32 slots = FIELD_GET(ARMV8_PMU_SLOTS, cpu_pmu->reg_pmmir); |
376 | |
377 | return sysfs_emit(buf: page, fmt: "0x%08x\n" , slots); |
378 | } |
379 | |
380 | static DEVICE_ATTR_RO(slots); |
381 | |
382 | static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr, |
383 | char *page) |
384 | { |
385 | struct pmu *pmu = dev_get_drvdata(dev); |
386 | struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); |
387 | u32 bus_slots = FIELD_GET(ARMV8_PMU_BUS_SLOTS, cpu_pmu->reg_pmmir); |
388 | |
389 | return sysfs_emit(buf: page, fmt: "0x%08x\n" , bus_slots); |
390 | } |
391 | |
392 | static DEVICE_ATTR_RO(bus_slots); |
393 | |
394 | static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr, |
395 | char *page) |
396 | { |
397 | struct pmu *pmu = dev_get_drvdata(dev); |
398 | struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); |
399 | u32 bus_width = FIELD_GET(ARMV8_PMU_BUS_WIDTH, cpu_pmu->reg_pmmir); |
400 | u32 val = 0; |
401 | |
402 | /* Encoded as Log2(number of bytes), plus one */ |
403 | if (bus_width > 2 && bus_width < 13) |
404 | val = 1 << (bus_width - 1); |
405 | |
406 | return sysfs_emit(buf: page, fmt: "0x%08x\n" , val); |
407 | } |
408 | |
409 | static DEVICE_ATTR_RO(bus_width); |
410 | |
411 | static u32 threshold_max(struct arm_pmu *cpu_pmu) |
412 | { |
413 | /* |
414 | * PMMIR.THWIDTH is readable and non-zero on aarch32, but it would be |
415 | * impossible to write the threshold in the upper 32 bits of PMEVTYPER. |
416 | */ |
417 | if (IS_ENABLED(CONFIG_ARM)) |
418 | return 0; |
419 | |
420 | /* |
421 | * The largest value that can be written to PMEVTYPER<n>_EL0.TH is |
422 | * (2 ^ PMMIR.THWIDTH) - 1. |
423 | */ |
424 | return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1; |
425 | } |
426 | |
427 | static ssize_t threshold_max_show(struct device *dev, |
428 | struct device_attribute *attr, char *page) |
429 | { |
430 | struct pmu *pmu = dev_get_drvdata(dev); |
431 | struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); |
432 | |
433 | return sysfs_emit(buf: page, fmt: "0x%08x\n" , threshold_max(cpu_pmu)); |
434 | } |
435 | |
436 | static DEVICE_ATTR_RO(threshold_max); |
437 | |
438 | static struct attribute *armv8_pmuv3_caps_attrs[] = { |
439 | &dev_attr_slots.attr, |
440 | &dev_attr_bus_slots.attr, |
441 | &dev_attr_bus_width.attr, |
442 | &dev_attr_threshold_max.attr, |
443 | NULL, |
444 | }; |
445 | |
446 | static const struct attribute_group armv8_pmuv3_caps_attr_group = { |
447 | .name = "caps" , |
448 | .attrs = armv8_pmuv3_caps_attrs, |
449 | }; |
450 | |
451 | /* |
452 | * Perf Events' indices |
453 | */ |
454 | #define ARMV8_IDX_CYCLE_COUNTER 0 |
455 | #define ARMV8_IDX_COUNTER0 1 |
456 | #define ARMV8_IDX_CYCLE_COUNTER_USER 32 |
457 | |
458 | /* |
459 | * We unconditionally enable ARMv8.5-PMU long event counter support |
460 | * (64-bit events) where supported. Indicate if this arm_pmu has long |
461 | * event counter support. |
462 | * |
463 | * On AArch32, long counters make no sense (you can't access the top |
464 | * bits), so we only enable this on AArch64. |
465 | */ |
466 | static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) |
467 | { |
468 | return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver)); |
469 | } |
470 | |
471 | static bool armv8pmu_event_has_user_read(struct perf_event *event) |
472 | { |
473 | return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT; |
474 | } |
475 | |
476 | /* |
477 | * We must chain two programmable counters for 64 bit events, |
478 | * except when we have allocated the 64bit cycle counter (for CPU |
479 | * cycles event) or when user space counter access is enabled. |
480 | */ |
481 | static bool armv8pmu_event_is_chained(struct perf_event *event) |
482 | { |
483 | int idx = event->hw.idx; |
484 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
485 | |
486 | return !armv8pmu_event_has_user_read(event) && |
487 | armv8pmu_event_is_64bit(event) && |
488 | !armv8pmu_has_long_event(cpu_pmu) && |
489 | (idx != ARMV8_IDX_CYCLE_COUNTER); |
490 | } |
491 | |
492 | /* |
493 | * ARMv8 low level PMU access |
494 | */ |
495 | |
496 | /* |
497 | * Perf Event to low level counters mapping |
498 | */ |
499 | #define ARMV8_IDX_TO_COUNTER(x) \ |
500 | (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) |
501 | |
502 | static u64 armv8pmu_pmcr_read(void) |
503 | { |
504 | return read_pmcr(); |
505 | } |
506 | |
507 | static void armv8pmu_pmcr_write(u64 val) |
508 | { |
509 | val &= ARMV8_PMU_PMCR_MASK; |
510 | isb(); |
511 | write_pmcr(val); |
512 | } |
513 | |
514 | static int armv8pmu_has_overflowed(u32 pmovsr) |
515 | { |
516 | return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; |
517 | } |
518 | |
519 | static int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) |
520 | { |
521 | return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); |
522 | } |
523 | |
524 | static u64 armv8pmu_read_evcntr(int idx) |
525 | { |
526 | u32 counter = ARMV8_IDX_TO_COUNTER(idx); |
527 | |
528 | return read_pmevcntrn(counter); |
529 | } |
530 | |
531 | static u64 armv8pmu_read_hw_counter(struct perf_event *event) |
532 | { |
533 | int idx = event->hw.idx; |
534 | u64 val = armv8pmu_read_evcntr(idx); |
535 | |
536 | if (armv8pmu_event_is_chained(event)) |
537 | val = (val << 32) | armv8pmu_read_evcntr(idx: idx - 1); |
538 | return val; |
539 | } |
540 | |
541 | /* |
542 | * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP |
543 | * is set the event counters also become 64-bit counters. Unless the |
544 | * user has requested a long counter (attr.config1) then we want to |
545 | * interrupt upon 32-bit overflow - we achieve this by applying a bias. |
546 | */ |
547 | static bool armv8pmu_event_needs_bias(struct perf_event *event) |
548 | { |
549 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
550 | struct hw_perf_event *hwc = &event->hw; |
551 | int idx = hwc->idx; |
552 | |
553 | if (armv8pmu_event_is_64bit(event)) |
554 | return false; |
555 | |
556 | if (armv8pmu_has_long_event(cpu_pmu) || |
557 | idx == ARMV8_IDX_CYCLE_COUNTER) |
558 | return true; |
559 | |
560 | return false; |
561 | } |
562 | |
563 | static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) |
564 | { |
565 | if (armv8pmu_event_needs_bias(event)) |
566 | value |= GENMASK_ULL(63, 32); |
567 | |
568 | return value; |
569 | } |
570 | |
571 | static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) |
572 | { |
573 | if (armv8pmu_event_needs_bias(event)) |
574 | value &= ~GENMASK_ULL(63, 32); |
575 | |
576 | return value; |
577 | } |
578 | |
579 | static u64 armv8pmu_read_counter(struct perf_event *event) |
580 | { |
581 | struct hw_perf_event *hwc = &event->hw; |
582 | int idx = hwc->idx; |
583 | u64 value; |
584 | |
585 | if (idx == ARMV8_IDX_CYCLE_COUNTER) |
586 | value = read_pmccntr(); |
587 | else |
588 | value = armv8pmu_read_hw_counter(event); |
589 | |
590 | return armv8pmu_unbias_long_counter(event, value); |
591 | } |
592 | |
593 | static void armv8pmu_write_evcntr(int idx, u64 value) |
594 | { |
595 | u32 counter = ARMV8_IDX_TO_COUNTER(idx); |
596 | |
597 | write_pmevcntrn(counter, value); |
598 | } |
599 | |
600 | static void armv8pmu_write_hw_counter(struct perf_event *event, |
601 | u64 value) |
602 | { |
603 | int idx = event->hw.idx; |
604 | |
605 | if (armv8pmu_event_is_chained(event)) { |
606 | armv8pmu_write_evcntr(idx, upper_32_bits(value)); |
607 | armv8pmu_write_evcntr(idx: idx - 1, lower_32_bits(value)); |
608 | } else { |
609 | armv8pmu_write_evcntr(idx, value); |
610 | } |
611 | } |
612 | |
613 | static void armv8pmu_write_counter(struct perf_event *event, u64 value) |
614 | { |
615 | struct hw_perf_event *hwc = &event->hw; |
616 | int idx = hwc->idx; |
617 | |
618 | value = armv8pmu_bias_long_counter(event, value); |
619 | |
620 | if (idx == ARMV8_IDX_CYCLE_COUNTER) |
621 | write_pmccntr(value); |
622 | else |
623 | armv8pmu_write_hw_counter(event, value); |
624 | } |
625 | |
626 | static void armv8pmu_write_evtype(int idx, unsigned long val) |
627 | { |
628 | u32 counter = ARMV8_IDX_TO_COUNTER(idx); |
629 | unsigned long mask = ARMV8_PMU_EVTYPE_EVENT | |
630 | ARMV8_PMU_INCLUDE_EL2 | |
631 | ARMV8_PMU_EXCLUDE_EL0 | |
632 | ARMV8_PMU_EXCLUDE_EL1; |
633 | |
634 | if (IS_ENABLED(CONFIG_ARM64)) |
635 | mask |= ARMV8_PMU_EVTYPE_TC | ARMV8_PMU_EVTYPE_TH; |
636 | |
637 | val &= mask; |
638 | write_pmevtypern(counter, val); |
639 | } |
640 | |
641 | static void armv8pmu_write_event_type(struct perf_event *event) |
642 | { |
643 | struct hw_perf_event *hwc = &event->hw; |
644 | int idx = hwc->idx; |
645 | |
646 | /* |
647 | * For chained events, the low counter is programmed to count |
648 | * the event of interest and the high counter is programmed |
649 | * with CHAIN event code with filters set to count at all ELs. |
650 | */ |
651 | if (armv8pmu_event_is_chained(event)) { |
652 | u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | |
653 | ARMV8_PMU_INCLUDE_EL2; |
654 | |
655 | armv8pmu_write_evtype(idx: idx - 1, val: hwc->config_base); |
656 | armv8pmu_write_evtype(idx, val: chain_evt); |
657 | } else { |
658 | if (idx == ARMV8_IDX_CYCLE_COUNTER) |
659 | write_pmccfiltr(hwc->config_base); |
660 | else |
661 | armv8pmu_write_evtype(idx, val: hwc->config_base); |
662 | } |
663 | } |
664 | |
665 | static u32 armv8pmu_event_cnten_mask(struct perf_event *event) |
666 | { |
667 | int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); |
668 | u32 mask = BIT(counter); |
669 | |
670 | if (armv8pmu_event_is_chained(event)) |
671 | mask |= BIT(counter - 1); |
672 | return mask; |
673 | } |
674 | |
675 | static void armv8pmu_enable_counter(u32 mask) |
676 | { |
677 | /* |
678 | * Make sure event configuration register writes are visible before we |
679 | * enable the counter. |
680 | * */ |
681 | isb(); |
682 | write_pmcntenset(mask); |
683 | } |
684 | |
685 | static void armv8pmu_enable_event_counter(struct perf_event *event) |
686 | { |
687 | struct perf_event_attr *attr = &event->attr; |
688 | u32 mask = armv8pmu_event_cnten_mask(event); |
689 | |
690 | kvm_set_pmu_events(mask, attr); |
691 | |
692 | /* We rely on the hypervisor switch code to enable guest counters */ |
693 | if (!kvm_pmu_counter_deferred(attr)) |
694 | armv8pmu_enable_counter(mask); |
695 | } |
696 | |
697 | static void armv8pmu_disable_counter(u32 mask) |
698 | { |
699 | write_pmcntenclr(mask); |
700 | /* |
701 | * Make sure the effects of disabling the counter are visible before we |
702 | * start configuring the event. |
703 | */ |
704 | isb(); |
705 | } |
706 | |
707 | static void armv8pmu_disable_event_counter(struct perf_event *event) |
708 | { |
709 | struct perf_event_attr *attr = &event->attr; |
710 | u32 mask = armv8pmu_event_cnten_mask(event); |
711 | |
712 | kvm_clr_pmu_events(mask); |
713 | |
714 | /* We rely on the hypervisor switch code to disable guest counters */ |
715 | if (!kvm_pmu_counter_deferred(attr)) |
716 | armv8pmu_disable_counter(mask); |
717 | } |
718 | |
719 | static void armv8pmu_enable_intens(u32 mask) |
720 | { |
721 | write_pmintenset(mask); |
722 | } |
723 | |
724 | static void armv8pmu_enable_event_irq(struct perf_event *event) |
725 | { |
726 | u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); |
727 | armv8pmu_enable_intens(BIT(counter)); |
728 | } |
729 | |
730 | static void armv8pmu_disable_intens(u32 mask) |
731 | { |
732 | write_pmintenclr(mask); |
733 | isb(); |
734 | /* Clear the overflow flag in case an interrupt is pending. */ |
735 | write_pmovsclr(mask); |
736 | isb(); |
737 | } |
738 | |
739 | static void armv8pmu_disable_event_irq(struct perf_event *event) |
740 | { |
741 | u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); |
742 | armv8pmu_disable_intens(BIT(counter)); |
743 | } |
744 | |
745 | static u32 armv8pmu_getreset_flags(void) |
746 | { |
747 | u32 value; |
748 | |
749 | /* Read */ |
750 | value = read_pmovsclr(); |
751 | |
752 | /* Write to clear flags */ |
753 | value &= ARMV8_PMU_OVERFLOWED_MASK; |
754 | write_pmovsclr(value); |
755 | |
756 | return value; |
757 | } |
758 | |
759 | static void update_pmuserenr(u64 val) |
760 | { |
761 | lockdep_assert_irqs_disabled(); |
762 | |
763 | /* |
764 | * The current PMUSERENR_EL0 value might be the value for the guest. |
765 | * If that's the case, have KVM keep tracking of the register value |
766 | * for the host EL0 so that KVM can restore it before returning to |
767 | * the host EL0. Otherwise, update the register now. |
768 | */ |
769 | if (kvm_set_pmuserenr(val)) |
770 | return; |
771 | |
772 | write_pmuserenr(val); |
773 | } |
774 | |
775 | static void armv8pmu_disable_user_access(void) |
776 | { |
777 | update_pmuserenr(val: 0); |
778 | } |
779 | |
780 | static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) |
781 | { |
782 | int i; |
783 | struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); |
784 | |
785 | /* Clear any unused counters to avoid leaking their contents */ |
786 | for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { |
787 | if (i == ARMV8_IDX_CYCLE_COUNTER) |
788 | write_pmccntr(0); |
789 | else |
790 | armv8pmu_write_evcntr(idx: i, value: 0); |
791 | } |
792 | |
793 | update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); |
794 | } |
795 | |
796 | static void armv8pmu_enable_event(struct perf_event *event) |
797 | { |
798 | /* |
799 | * Enable counter and interrupt, and set the counter to count |
800 | * the event that we're interested in. |
801 | */ |
802 | armv8pmu_disable_event_counter(event); |
803 | armv8pmu_write_event_type(event); |
804 | armv8pmu_enable_event_irq(event); |
805 | armv8pmu_enable_event_counter(event); |
806 | } |
807 | |
808 | static void armv8pmu_disable_event(struct perf_event *event) |
809 | { |
810 | armv8pmu_disable_event_counter(event); |
811 | armv8pmu_disable_event_irq(event); |
812 | } |
813 | |
814 | static void armv8pmu_start(struct arm_pmu *cpu_pmu) |
815 | { |
816 | struct perf_event_context *ctx; |
817 | int nr_user = 0; |
818 | |
819 | ctx = perf_cpu_task_ctx(); |
820 | if (ctx) |
821 | nr_user = ctx->nr_user; |
822 | |
823 | if (sysctl_perf_user_access && nr_user) |
824 | armv8pmu_enable_user_access(cpu_pmu); |
825 | else |
826 | armv8pmu_disable_user_access(); |
827 | |
828 | /* Enable all counters */ |
829 | armv8pmu_pmcr_write(val: armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); |
830 | |
831 | kvm_vcpu_pmu_resync_el0(); |
832 | } |
833 | |
834 | static void armv8pmu_stop(struct arm_pmu *cpu_pmu) |
835 | { |
836 | /* Disable all counters */ |
837 | armv8pmu_pmcr_write(val: armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); |
838 | } |
839 | |
840 | static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) |
841 | { |
842 | u32 pmovsr; |
843 | struct perf_sample_data data; |
844 | struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); |
845 | struct pt_regs *regs; |
846 | int idx; |
847 | |
848 | /* |
849 | * Get and reset the IRQ flags |
850 | */ |
851 | pmovsr = armv8pmu_getreset_flags(); |
852 | |
853 | /* |
854 | * Did an overflow occur? |
855 | */ |
856 | if (!armv8pmu_has_overflowed(pmovsr)) |
857 | return IRQ_NONE; |
858 | |
859 | /* |
860 | * Handle the counter(s) overflow(s) |
861 | */ |
862 | regs = get_irq_regs(); |
863 | |
864 | /* |
865 | * Stop the PMU while processing the counter overflows |
866 | * to prevent skews in group events. |
867 | */ |
868 | armv8pmu_stop(cpu_pmu); |
869 | for (idx = 0; idx < cpu_pmu->num_events; ++idx) { |
870 | struct perf_event *event = cpuc->events[idx]; |
871 | struct hw_perf_event *hwc; |
872 | |
873 | /* Ignore if we don't have an event. */ |
874 | if (!event) |
875 | continue; |
876 | |
877 | /* |
878 | * We have a single interrupt for all counters. Check that |
879 | * each counter has overflowed before we process it. |
880 | */ |
881 | if (!armv8pmu_counter_has_overflowed(pmnc: pmovsr, idx)) |
882 | continue; |
883 | |
884 | hwc = &event->hw; |
885 | armpmu_event_update(event); |
886 | perf_sample_data_init(data: &data, addr: 0, period: hwc->last_period); |
887 | if (!armpmu_event_set_period(event)) |
888 | continue; |
889 | |
890 | /* |
891 | * Perf event overflow will queue the processing of the event as |
892 | * an irq_work which will be taken care of in the handling of |
893 | * IPI_IRQ_WORK. |
894 | */ |
895 | if (perf_event_overflow(event, data: &data, regs)) |
896 | cpu_pmu->disable(event); |
897 | } |
898 | armv8pmu_start(cpu_pmu); |
899 | |
900 | return IRQ_HANDLED; |
901 | } |
902 | |
903 | static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, |
904 | struct arm_pmu *cpu_pmu) |
905 | { |
906 | int idx; |
907 | |
908 | for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { |
909 | if (!test_and_set_bit(nr: idx, addr: cpuc->used_mask)) |
910 | return idx; |
911 | } |
912 | return -EAGAIN; |
913 | } |
914 | |
915 | static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, |
916 | struct arm_pmu *cpu_pmu) |
917 | { |
918 | int idx; |
919 | |
920 | /* |
921 | * Chaining requires two consecutive event counters, where |
922 | * the lower idx must be even. |
923 | */ |
924 | for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { |
925 | if (!test_and_set_bit(nr: idx, addr: cpuc->used_mask)) { |
926 | /* Check if the preceding even counter is available */ |
927 | if (!test_and_set_bit(nr: idx - 1, addr: cpuc->used_mask)) |
928 | return idx; |
929 | /* Release the Odd counter */ |
930 | clear_bit(nr: idx, addr: cpuc->used_mask); |
931 | } |
932 | } |
933 | return -EAGAIN; |
934 | } |
935 | |
936 | static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, |
937 | struct perf_event *event) |
938 | { |
939 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
940 | struct hw_perf_event *hwc = &event->hw; |
941 | unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; |
942 | |
943 | /* Always prefer to place a cycle counter into the cycle counter. */ |
944 | if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { |
945 | if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, addr: cpuc->used_mask)) |
946 | return ARMV8_IDX_CYCLE_COUNTER; |
947 | else if (armv8pmu_event_is_64bit(event) && |
948 | armv8pmu_event_want_user_access(event) && |
949 | !armv8pmu_has_long_event(cpu_pmu)) |
950 | return -EAGAIN; |
951 | } |
952 | |
953 | /* |
954 | * Otherwise use events counters |
955 | */ |
956 | if (armv8pmu_event_is_chained(event)) |
957 | return armv8pmu_get_chain_idx(cpuc, cpu_pmu); |
958 | else |
959 | return armv8pmu_get_single_idx(cpuc, cpu_pmu); |
960 | } |
961 | |
962 | static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, |
963 | struct perf_event *event) |
964 | { |
965 | int idx = event->hw.idx; |
966 | |
967 | clear_bit(nr: idx, addr: cpuc->used_mask); |
968 | if (armv8pmu_event_is_chained(event)) |
969 | clear_bit(nr: idx - 1, addr: cpuc->used_mask); |
970 | } |
971 | |
972 | static int armv8pmu_user_event_idx(struct perf_event *event) |
973 | { |
974 | if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event)) |
975 | return 0; |
976 | |
977 | /* |
978 | * We remap the cycle counter index to 32 to |
979 | * match the offset applied to the rest of |
980 | * the counter indices. |
981 | */ |
982 | if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER) |
983 | return ARMV8_IDX_CYCLE_COUNTER_USER; |
984 | |
985 | return event->hw.idx; |
986 | } |
987 | |
988 | /* |
989 | * Add an event filter to a given event. |
990 | */ |
991 | static int armv8pmu_set_event_filter(struct hw_perf_event *event, |
992 | struct perf_event_attr *attr) |
993 | { |
994 | unsigned long config_base = 0; |
995 | struct perf_event *perf_event = container_of(attr, struct perf_event, |
996 | attr); |
997 | struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); |
998 | u32 th; |
999 | |
1000 | if (attr->exclude_idle) { |
1001 | pr_debug("ARM performance counters do not support mode exclusion\n" ); |
1002 | return -EOPNOTSUPP; |
1003 | } |
1004 | |
1005 | /* |
1006 | * If we're running in hyp mode, then we *are* the hypervisor. |
1007 | * Therefore we ignore exclude_hv in this configuration, since |
1008 | * there's no hypervisor to sample anyway. This is consistent |
1009 | * with other architectures (x86 and Power). |
1010 | */ |
1011 | if (is_kernel_in_hyp_mode()) { |
1012 | if (!attr->exclude_kernel && !attr->exclude_host) |
1013 | config_base |= ARMV8_PMU_INCLUDE_EL2; |
1014 | if (attr->exclude_guest) |
1015 | config_base |= ARMV8_PMU_EXCLUDE_EL1; |
1016 | if (attr->exclude_host) |
1017 | config_base |= ARMV8_PMU_EXCLUDE_EL0; |
1018 | } else { |
1019 | if (!attr->exclude_hv && !attr->exclude_host) |
1020 | config_base |= ARMV8_PMU_INCLUDE_EL2; |
1021 | } |
1022 | |
1023 | /* |
1024 | * Filter out !VHE kernels and guest kernels |
1025 | */ |
1026 | if (attr->exclude_kernel) |
1027 | config_base |= ARMV8_PMU_EXCLUDE_EL1; |
1028 | |
1029 | if (attr->exclude_user) |
1030 | config_base |= ARMV8_PMU_EXCLUDE_EL0; |
1031 | |
1032 | /* |
1033 | * If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will |
1034 | * be 0 and will also trigger this check, preventing it from being used. |
1035 | */ |
1036 | th = ATTR_CFG_GET_FLD(attr, threshold); |
1037 | if (th > threshold_max(cpu_pmu)) { |
1038 | pr_debug("PMU event threshold exceeds max value\n" ); |
1039 | return -EINVAL; |
1040 | } |
1041 | |
1042 | if (IS_ENABLED(CONFIG_ARM64) && th) { |
1043 | config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th); |
1044 | config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC, |
1045 | armv8pmu_event_threshold_control(attr)); |
1046 | } |
1047 | |
1048 | /* |
1049 | * Install the filter into config_base as this is used to |
1050 | * construct the event type. |
1051 | */ |
1052 | event->config_base = config_base; |
1053 | |
1054 | return 0; |
1055 | } |
1056 | |
1057 | static void armv8pmu_reset(void *info) |
1058 | { |
1059 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; |
1060 | u64 pmcr; |
1061 | |
1062 | /* The counter and interrupt enable registers are unknown at reset. */ |
1063 | armv8pmu_disable_counter(U32_MAX); |
1064 | armv8pmu_disable_intens(U32_MAX); |
1065 | |
1066 | /* Clear the counters we flip at guest entry/exit */ |
1067 | kvm_clr_pmu_events(U32_MAX); |
1068 | |
1069 | /* |
1070 | * Initialize & Reset PMNC. Request overflow interrupt for |
1071 | * 64 bit cycle counter but cheat in armv8pmu_write_counter(). |
1072 | */ |
1073 | pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; |
1074 | |
1075 | /* Enable long event counter support where available */ |
1076 | if (armv8pmu_has_long_event(cpu_pmu)) |
1077 | pmcr |= ARMV8_PMU_PMCR_LP; |
1078 | |
1079 | armv8pmu_pmcr_write(val: pmcr); |
1080 | } |
1081 | |
1082 | static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu, |
1083 | struct perf_event *event) |
1084 | { |
1085 | if (event->attr.type == PERF_TYPE_HARDWARE && |
1086 | event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) { |
1087 | |
1088 | if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, |
1089 | armpmu->pmceid_bitmap)) |
1090 | return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED; |
1091 | |
1092 | if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED, |
1093 | armpmu->pmceid_bitmap)) |
1094 | return ARMV8_PMUV3_PERFCTR_BR_RETIRED; |
1095 | |
1096 | return HW_OP_UNSUPPORTED; |
1097 | } |
1098 | |
1099 | return armpmu_map_event(event, &armv8_pmuv3_perf_map, |
1100 | &armv8_pmuv3_perf_cache_map, |
1101 | ARMV8_PMU_EVTYPE_EVENT); |
1102 | } |
1103 | |
1104 | static int __armv8_pmuv3_map_event(struct perf_event *event, |
1105 | const unsigned (*) |
1106 | [PERF_COUNT_HW_MAX], |
1107 | const unsigned (*) |
1108 | [PERF_COUNT_HW_CACHE_MAX] |
1109 | [PERF_COUNT_HW_CACHE_OP_MAX] |
1110 | [PERF_COUNT_HW_CACHE_RESULT_MAX]) |
1111 | { |
1112 | int hw_event_id; |
1113 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); |
1114 | |
1115 | hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event); |
1116 | |
1117 | /* |
1118 | * CHAIN events only work when paired with an adjacent counter, and it |
1119 | * never makes sense for a user to open one in isolation, as they'll be |
1120 | * rotated arbitrarily. |
1121 | */ |
1122 | if (hw_event_id == ARMV8_PMUV3_PERFCTR_CHAIN) |
1123 | return -EINVAL; |
1124 | |
1125 | if (armv8pmu_event_is_64bit(event)) |
1126 | event->hw.flags |= ARMPMU_EVT_64BIT; |
1127 | |
1128 | /* |
1129 | * User events must be allocated into a single counter, and so |
1130 | * must not be chained. |
1131 | * |
1132 | * Most 64-bit events require long counter support, but 64-bit |
1133 | * CPU_CYCLES events can be placed into the dedicated cycle |
1134 | * counter when this is free. |
1135 | */ |
1136 | if (armv8pmu_event_want_user_access(event)) { |
1137 | if (!(event->attach_state & PERF_ATTACH_TASK)) |
1138 | return -EINVAL; |
1139 | if (armv8pmu_event_is_64bit(event) && |
1140 | (hw_event_id != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && |
1141 | !armv8pmu_has_long_event(cpu_pmu: armpmu)) |
1142 | return -EOPNOTSUPP; |
1143 | |
1144 | event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; |
1145 | } |
1146 | |
1147 | /* Only expose micro/arch events supported by this PMU */ |
1148 | if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) |
1149 | && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { |
1150 | return hw_event_id; |
1151 | } |
1152 | |
1153 | return armpmu_map_event(event, extra_event_map, extra_cache_map, |
1154 | ARMV8_PMU_EVTYPE_EVENT); |
1155 | } |
1156 | |
1157 | static int armv8_pmuv3_map_event(struct perf_event *event) |
1158 | { |
1159 | return __armv8_pmuv3_map_event(event, NULL, NULL); |
1160 | } |
1161 | |
1162 | static int armv8_a53_map_event(struct perf_event *event) |
1163 | { |
1164 | return __armv8_pmuv3_map_event(event, NULL, extra_cache_map: &armv8_a53_perf_cache_map); |
1165 | } |
1166 | |
1167 | static int armv8_a57_map_event(struct perf_event *event) |
1168 | { |
1169 | return __armv8_pmuv3_map_event(event, NULL, extra_cache_map: &armv8_a57_perf_cache_map); |
1170 | } |
1171 | |
1172 | static int armv8_a73_map_event(struct perf_event *event) |
1173 | { |
1174 | return __armv8_pmuv3_map_event(event, NULL, extra_cache_map: &armv8_a73_perf_cache_map); |
1175 | } |
1176 | |
1177 | static int armv8_thunder_map_event(struct perf_event *event) |
1178 | { |
1179 | return __armv8_pmuv3_map_event(event, NULL, |
1180 | extra_cache_map: &armv8_thunder_perf_cache_map); |
1181 | } |
1182 | |
1183 | static int armv8_vulcan_map_event(struct perf_event *event) |
1184 | { |
1185 | return __armv8_pmuv3_map_event(event, NULL, |
1186 | extra_cache_map: &armv8_vulcan_perf_cache_map); |
1187 | } |
1188 | |
1189 | struct armv8pmu_probe_info { |
1190 | struct arm_pmu *pmu; |
1191 | bool present; |
1192 | }; |
1193 | |
1194 | static void __armv8pmu_probe_pmu(void *info) |
1195 | { |
1196 | struct armv8pmu_probe_info *probe = info; |
1197 | struct arm_pmu *cpu_pmu = probe->pmu; |
1198 | u64 pmceid_raw[2]; |
1199 | u32 pmceid[2]; |
1200 | int pmuver; |
1201 | |
1202 | pmuver = read_pmuver(); |
1203 | if (!pmuv3_implemented(pmuver)) |
1204 | return; |
1205 | |
1206 | cpu_pmu->pmuver = pmuver; |
1207 | probe->present = true; |
1208 | |
1209 | /* Read the nb of CNTx counters supported from PMNC */ |
1210 | cpu_pmu->num_events = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()); |
1211 | |
1212 | /* Add the CPU cycles counter */ |
1213 | cpu_pmu->num_events += 1; |
1214 | |
1215 | pmceid[0] = pmceid_raw[0] = read_pmceid0(); |
1216 | pmceid[1] = pmceid_raw[1] = read_pmceid1(); |
1217 | |
1218 | bitmap_from_arr32(cpu_pmu->pmceid_bitmap, |
1219 | pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); |
1220 | |
1221 | pmceid[0] = pmceid_raw[0] >> 32; |
1222 | pmceid[1] = pmceid_raw[1] >> 32; |
1223 | |
1224 | bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, |
1225 | pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); |
1226 | |
1227 | /* store PMMIR register for sysfs */ |
1228 | if (is_pmuv3p4(pmuver)) |
1229 | cpu_pmu->reg_pmmir = read_pmmir(); |
1230 | else |
1231 | cpu_pmu->reg_pmmir = 0; |
1232 | } |
1233 | |
1234 | static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) |
1235 | { |
1236 | struct armv8pmu_probe_info probe = { |
1237 | .pmu = cpu_pmu, |
1238 | .present = false, |
1239 | }; |
1240 | int ret; |
1241 | |
1242 | ret = smp_call_function_any(mask: &cpu_pmu->supported_cpus, |
1243 | func: __armv8pmu_probe_pmu, |
1244 | info: &probe, wait: 1); |
1245 | if (ret) |
1246 | return ret; |
1247 | |
1248 | return probe.present ? 0 : -ENODEV; |
1249 | } |
1250 | |
1251 | static void armv8pmu_disable_user_access_ipi(void *unused) |
1252 | { |
1253 | armv8pmu_disable_user_access(); |
1254 | } |
1255 | |
1256 | static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write, |
1257 | void *buffer, size_t *lenp, loff_t *ppos) |
1258 | { |
1259 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
1260 | if (ret || !write || sysctl_perf_user_access) |
1261 | return ret; |
1262 | |
1263 | on_each_cpu(func: armv8pmu_disable_user_access_ipi, NULL, wait: 1); |
1264 | return 0; |
1265 | } |
1266 | |
1267 | static struct ctl_table armv8_pmu_sysctl_table[] = { |
1268 | { |
1269 | .procname = "perf_user_access" , |
1270 | .data = &sysctl_perf_user_access, |
1271 | .maxlen = sizeof(unsigned int), |
1272 | .mode = 0644, |
1273 | .proc_handler = armv8pmu_proc_user_access_handler, |
1274 | .extra1 = SYSCTL_ZERO, |
1275 | .extra2 = SYSCTL_ONE, |
1276 | }, |
1277 | }; |
1278 | |
1279 | static void armv8_pmu_register_sysctl_table(void) |
1280 | { |
1281 | static u32 tbl_registered = 0; |
1282 | |
1283 | if (!cmpxchg_relaxed(&tbl_registered, 0, 1)) |
1284 | register_sysctl("kernel" , armv8_pmu_sysctl_table); |
1285 | } |
1286 | |
1287 | static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, |
1288 | int (*map_event)(struct perf_event *event)) |
1289 | { |
1290 | int ret = armv8pmu_probe_pmu(cpu_pmu); |
1291 | if (ret) |
1292 | return ret; |
1293 | |
1294 | cpu_pmu->handle_irq = armv8pmu_handle_irq; |
1295 | cpu_pmu->enable = armv8pmu_enable_event; |
1296 | cpu_pmu->disable = armv8pmu_disable_event; |
1297 | cpu_pmu->read_counter = armv8pmu_read_counter; |
1298 | cpu_pmu->write_counter = armv8pmu_write_counter; |
1299 | cpu_pmu->get_event_idx = armv8pmu_get_event_idx; |
1300 | cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx; |
1301 | cpu_pmu->start = armv8pmu_start; |
1302 | cpu_pmu->stop = armv8pmu_stop; |
1303 | cpu_pmu->reset = armv8pmu_reset; |
1304 | cpu_pmu->set_event_filter = armv8pmu_set_event_filter; |
1305 | |
1306 | cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; |
1307 | |
1308 | cpu_pmu->name = name; |
1309 | cpu_pmu->map_event = map_event; |
1310 | cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; |
1311 | cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; |
1312 | cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = &armv8_pmuv3_caps_attr_group; |
1313 | armv8_pmu_register_sysctl_table(); |
1314 | return 0; |
1315 | } |
1316 | |
1317 | #define PMUV3_INIT_SIMPLE(name) \ |
1318 | static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ |
1319 | { \ |
1320 | return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event); \ |
1321 | } |
1322 | |
1323 | #define PMUV3_INIT_MAP_EVENT(name, map_event) \ |
1324 | static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ |
1325 | { \ |
1326 | return armv8_pmu_init(cpu_pmu, #name, map_event); \ |
1327 | } |
1328 | |
1329 | PMUV3_INIT_SIMPLE(armv8_pmuv3) |
1330 | |
1331 | PMUV3_INIT_SIMPLE(armv8_cortex_a34) |
1332 | PMUV3_INIT_SIMPLE(armv8_cortex_a55) |
1333 | PMUV3_INIT_SIMPLE(armv8_cortex_a65) |
1334 | PMUV3_INIT_SIMPLE(armv8_cortex_a75) |
1335 | PMUV3_INIT_SIMPLE(armv8_cortex_a76) |
1336 | PMUV3_INIT_SIMPLE(armv8_cortex_a77) |
1337 | PMUV3_INIT_SIMPLE(armv8_cortex_a78) |
1338 | PMUV3_INIT_SIMPLE(armv9_cortex_a510) |
1339 | PMUV3_INIT_SIMPLE(armv9_cortex_a520) |
1340 | PMUV3_INIT_SIMPLE(armv9_cortex_a710) |
1341 | PMUV3_INIT_SIMPLE(armv9_cortex_a715) |
1342 | PMUV3_INIT_SIMPLE(armv9_cortex_a720) |
1343 | PMUV3_INIT_SIMPLE(armv8_cortex_x1) |
1344 | PMUV3_INIT_SIMPLE(armv9_cortex_x2) |
1345 | PMUV3_INIT_SIMPLE(armv9_cortex_x3) |
1346 | PMUV3_INIT_SIMPLE(armv9_cortex_x4) |
1347 | PMUV3_INIT_SIMPLE(armv8_neoverse_e1) |
1348 | PMUV3_INIT_SIMPLE(armv8_neoverse_n1) |
1349 | PMUV3_INIT_SIMPLE(armv9_neoverse_n2) |
1350 | PMUV3_INIT_SIMPLE(armv8_neoverse_v1) |
1351 | |
1352 | PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) |
1353 | PMUV3_INIT_SIMPLE(armv8_nvidia_denver) |
1354 | |
1355 | PMUV3_INIT_MAP_EVENT(armv8_cortex_a35, armv8_a53_map_event) |
1356 | PMUV3_INIT_MAP_EVENT(armv8_cortex_a53, armv8_a53_map_event) |
1357 | PMUV3_INIT_MAP_EVENT(armv8_cortex_a57, armv8_a57_map_event) |
1358 | PMUV3_INIT_MAP_EVENT(armv8_cortex_a72, armv8_a57_map_event) |
1359 | PMUV3_INIT_MAP_EVENT(armv8_cortex_a73, armv8_a73_map_event) |
1360 | PMUV3_INIT_MAP_EVENT(armv8_cavium_thunder, armv8_thunder_map_event) |
1361 | PMUV3_INIT_MAP_EVENT(armv8_brcm_vulcan, armv8_vulcan_map_event) |
1362 | |
1363 | static const struct of_device_id armv8_pmu_of_device_ids[] = { |
1364 | {.compatible = "arm,armv8-pmuv3" , .data = armv8_pmuv3_pmu_init}, |
1365 | {.compatible = "arm,cortex-a34-pmu" , .data = armv8_cortex_a34_pmu_init}, |
1366 | {.compatible = "arm,cortex-a35-pmu" , .data = armv8_cortex_a35_pmu_init}, |
1367 | {.compatible = "arm,cortex-a53-pmu" , .data = armv8_cortex_a53_pmu_init}, |
1368 | {.compatible = "arm,cortex-a55-pmu" , .data = armv8_cortex_a55_pmu_init}, |
1369 | {.compatible = "arm,cortex-a57-pmu" , .data = armv8_cortex_a57_pmu_init}, |
1370 | {.compatible = "arm,cortex-a65-pmu" , .data = armv8_cortex_a65_pmu_init}, |
1371 | {.compatible = "arm,cortex-a72-pmu" , .data = armv8_cortex_a72_pmu_init}, |
1372 | {.compatible = "arm,cortex-a73-pmu" , .data = armv8_cortex_a73_pmu_init}, |
1373 | {.compatible = "arm,cortex-a75-pmu" , .data = armv8_cortex_a75_pmu_init}, |
1374 | {.compatible = "arm,cortex-a76-pmu" , .data = armv8_cortex_a76_pmu_init}, |
1375 | {.compatible = "arm,cortex-a77-pmu" , .data = armv8_cortex_a77_pmu_init}, |
1376 | {.compatible = "arm,cortex-a78-pmu" , .data = armv8_cortex_a78_pmu_init}, |
1377 | {.compatible = "arm,cortex-a510-pmu" , .data = armv9_cortex_a510_pmu_init}, |
1378 | {.compatible = "arm,cortex-a520-pmu" , .data = armv9_cortex_a520_pmu_init}, |
1379 | {.compatible = "arm,cortex-a710-pmu" , .data = armv9_cortex_a710_pmu_init}, |
1380 | {.compatible = "arm,cortex-a715-pmu" , .data = armv9_cortex_a715_pmu_init}, |
1381 | {.compatible = "arm,cortex-a720-pmu" , .data = armv9_cortex_a720_pmu_init}, |
1382 | {.compatible = "arm,cortex-x1-pmu" , .data = armv8_cortex_x1_pmu_init}, |
1383 | {.compatible = "arm,cortex-x2-pmu" , .data = armv9_cortex_x2_pmu_init}, |
1384 | {.compatible = "arm,cortex-x3-pmu" , .data = armv9_cortex_x3_pmu_init}, |
1385 | {.compatible = "arm,cortex-x4-pmu" , .data = armv9_cortex_x4_pmu_init}, |
1386 | {.compatible = "arm,neoverse-e1-pmu" , .data = armv8_neoverse_e1_pmu_init}, |
1387 | {.compatible = "arm,neoverse-n1-pmu" , .data = armv8_neoverse_n1_pmu_init}, |
1388 | {.compatible = "arm,neoverse-n2-pmu" , .data = armv9_neoverse_n2_pmu_init}, |
1389 | {.compatible = "arm,neoverse-v1-pmu" , .data = armv8_neoverse_v1_pmu_init}, |
1390 | {.compatible = "cavium,thunder-pmu" , .data = armv8_cavium_thunder_pmu_init}, |
1391 | {.compatible = "brcm,vulcan-pmu" , .data = armv8_brcm_vulcan_pmu_init}, |
1392 | {.compatible = "nvidia,carmel-pmu" , .data = armv8_nvidia_carmel_pmu_init}, |
1393 | {.compatible = "nvidia,denver-pmu" , .data = armv8_nvidia_denver_pmu_init}, |
1394 | {}, |
1395 | }; |
1396 | |
1397 | static int armv8_pmu_device_probe(struct platform_device *pdev) |
1398 | { |
1399 | return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); |
1400 | } |
1401 | |
1402 | static struct platform_driver armv8_pmu_driver = { |
1403 | .driver = { |
1404 | .name = ARMV8_PMU_PDEV_NAME, |
1405 | .of_match_table = armv8_pmu_of_device_ids, |
1406 | .suppress_bind_attrs = true, |
1407 | }, |
1408 | .probe = armv8_pmu_device_probe, |
1409 | }; |
1410 | |
1411 | static int __init armv8_pmu_driver_init(void) |
1412 | { |
1413 | int ret; |
1414 | |
1415 | if (acpi_disabled) |
1416 | ret = platform_driver_register(&armv8_pmu_driver); |
1417 | else |
1418 | ret = arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); |
1419 | |
1420 | if (!ret) |
1421 | lockup_detector_retry_init(); |
1422 | |
1423 | return ret; |
1424 | } |
1425 | device_initcall(armv8_pmu_driver_init) |
1426 | |
1427 | void arch_perf_update_userpage(struct perf_event *event, |
1428 | struct perf_event_mmap_page *userpg, u64 now) |
1429 | { |
1430 | struct clock_read_data *rd; |
1431 | unsigned int seq; |
1432 | u64 ns; |
1433 | |
1434 | userpg->cap_user_time = 0; |
1435 | userpg->cap_user_time_zero = 0; |
1436 | userpg->cap_user_time_short = 0; |
1437 | userpg->cap_user_rdpmc = armv8pmu_event_has_user_read(event); |
1438 | |
1439 | if (userpg->cap_user_rdpmc) { |
1440 | if (event->hw.flags & ARMPMU_EVT_64BIT) |
1441 | userpg->pmc_width = 64; |
1442 | else |
1443 | userpg->pmc_width = 32; |
1444 | } |
1445 | |
1446 | do { |
1447 | rd = sched_clock_read_begin(&seq); |
1448 | |
1449 | if (rd->read_sched_clock != arch_timer_read_counter) |
1450 | return; |
1451 | |
1452 | userpg->time_mult = rd->mult; |
1453 | userpg->time_shift = rd->shift; |
1454 | userpg->time_zero = rd->epoch_ns; |
1455 | userpg->time_cycles = rd->epoch_cyc; |
1456 | userpg->time_mask = rd->sched_clock_mask; |
1457 | |
1458 | /* |
1459 | * Subtract the cycle base, such that software that |
1460 | * doesn't know about cap_user_time_short still 'works' |
1461 | * assuming no wraps. |
1462 | */ |
1463 | ns = mul_u64_u32_shr(a: rd->epoch_cyc, mul: rd->mult, shift: rd->shift); |
1464 | userpg->time_zero -= ns; |
1465 | |
1466 | } while (sched_clock_read_retry(seq)); |
1467 | |
1468 | userpg->time_offset = userpg->time_zero - now; |
1469 | |
1470 | /* |
1471 | * time_shift is not expected to be greater than 31 due to |
1472 | * the original published conversion algorithm shifting a |
1473 | * 32-bit value (now specifies a 64-bit value) - refer |
1474 | * perf_event_mmap_page documentation in perf_event.h. |
1475 | */ |
1476 | if (userpg->time_shift == 32) { |
1477 | userpg->time_shift = 31; |
1478 | userpg->time_mult >>= 1; |
1479 | } |
1480 | |
1481 | /* |
1482 | * Internal timekeeping for enabled/running/stopped times |
1483 | * is always computed with the sched_clock. |
1484 | */ |
1485 | userpg->cap_user_time = 1; |
1486 | userpg->cap_user_time_zero = 1; |
1487 | userpg->cap_user_time_short = 1; |
1488 | } |
1489 | |