1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Support Intel IOMMU PerfMon |
4 | * Copyright(c) 2023 Intel Corporation. |
5 | */ |
6 | #define pr_fmt(fmt) "DMAR: " fmt |
7 | #define dev_fmt(fmt) pr_fmt(fmt) |
8 | |
9 | #include <linux/dmar.h> |
10 | #include "iommu.h" |
11 | #include "perfmon.h" |
12 | |
13 | PMU_FORMAT_ATTR(event, "config:0-27" ); /* ES: Events Select */ |
14 | PMU_FORMAT_ATTR(event_group, "config:28-31" ); /* EGI: Event Group Index */ |
15 | |
16 | static struct attribute *iommu_pmu_format_attrs[] = { |
17 | &format_attr_event_group.attr, |
18 | &format_attr_event.attr, |
19 | NULL |
20 | }; |
21 | |
22 | static struct attribute_group iommu_pmu_format_attr_group = { |
23 | .name = "format" , |
24 | .attrs = iommu_pmu_format_attrs, |
25 | }; |
26 | |
27 | /* The available events are added in attr_update later */ |
28 | static struct attribute *attrs_empty[] = { |
29 | NULL |
30 | }; |
31 | |
32 | static struct attribute_group iommu_pmu_events_attr_group = { |
33 | .name = "events" , |
34 | .attrs = attrs_empty, |
35 | }; |
36 | |
37 | static cpumask_t iommu_pmu_cpu_mask; |
38 | |
39 | static ssize_t |
40 | cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) |
41 | { |
42 | return cpumap_print_to_pagebuf(list: true, buf, mask: &iommu_pmu_cpu_mask); |
43 | } |
44 | static DEVICE_ATTR_RO(cpumask); |
45 | |
46 | static struct attribute *iommu_pmu_cpumask_attrs[] = { |
47 | &dev_attr_cpumask.attr, |
48 | NULL |
49 | }; |
50 | |
51 | static struct attribute_group iommu_pmu_cpumask_attr_group = { |
52 | .attrs = iommu_pmu_cpumask_attrs, |
53 | }; |
54 | |
55 | static const struct attribute_group *iommu_pmu_attr_groups[] = { |
56 | &iommu_pmu_format_attr_group, |
57 | &iommu_pmu_events_attr_group, |
58 | &iommu_pmu_cpumask_attr_group, |
59 | NULL |
60 | }; |
61 | |
62 | static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev) |
63 | { |
64 | /* |
65 | * The perf_event creates its own dev for each PMU. |
66 | * See pmu_dev_alloc() |
67 | */ |
68 | return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu); |
69 | } |
70 | |
71 | #define IOMMU_PMU_ATTR(_name, _format, _filter) \ |
72 | PMU_FORMAT_ATTR(_name, _format); \ |
73 | \ |
74 | static struct attribute *_name##_attr[] = { \ |
75 | &format_attr_##_name.attr, \ |
76 | NULL \ |
77 | }; \ |
78 | \ |
79 | static umode_t \ |
80 | _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ |
81 | { \ |
82 | struct device *dev = kobj_to_dev(kobj); \ |
83 | struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ |
84 | \ |
85 | if (!iommu_pmu) \ |
86 | return 0; \ |
87 | return (iommu_pmu->filter & _filter) ? attr->mode : 0; \ |
88 | } \ |
89 | \ |
90 | static struct attribute_group _name = { \ |
91 | .name = "format", \ |
92 | .attrs = _name##_attr, \ |
93 | .is_visible = _name##_is_visible, \ |
94 | }; |
95 | |
96 | IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0" , IOMMU_PMU_FILTER_REQUESTER_ID); |
97 | IOMMU_PMU_ATTR(filter_domain_en, "config1:1" , IOMMU_PMU_FILTER_DOMAIN); |
98 | IOMMU_PMU_ATTR(filter_pasid_en, "config1:2" , IOMMU_PMU_FILTER_PASID); |
99 | IOMMU_PMU_ATTR(filter_ats_en, "config1:3" , IOMMU_PMU_FILTER_ATS); |
100 | IOMMU_PMU_ATTR(filter_page_table_en, "config1:4" , IOMMU_PMU_FILTER_PAGE_TABLE); |
101 | IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31" , IOMMU_PMU_FILTER_REQUESTER_ID); |
102 | IOMMU_PMU_ATTR(filter_domain, "config1:32-47" , IOMMU_PMU_FILTER_DOMAIN); |
103 | IOMMU_PMU_ATTR(filter_pasid, "config2:0-21" , IOMMU_PMU_FILTER_PASID); |
104 | IOMMU_PMU_ATTR(filter_ats, "config2:24-28" , IOMMU_PMU_FILTER_ATS); |
105 | IOMMU_PMU_ATTR(filter_page_table, "config2:32-36" , IOMMU_PMU_FILTER_PAGE_TABLE); |
106 | |
107 | #define iommu_pmu_en_requester_id(e) ((e) & 0x1) |
108 | #define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1) |
109 | #define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1) |
110 | #define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1) |
111 | #define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1) |
112 | #define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff) |
113 | #define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff) |
114 | #define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff) |
115 | #define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f) |
116 | #define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f) |
117 | |
118 | #define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \ |
119 | { \ |
120 | if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \ |
121 | dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ |
122 | IOMMU_PMU_CFG_SIZE + \ |
123 | (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ |
124 | iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\ |
125 | } \ |
126 | } |
127 | |
128 | #define iommu_pmu_clear_filter(_filter, _idx) \ |
129 | { \ |
130 | if (iommu_pmu->filter & _filter) { \ |
131 | dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ |
132 | IOMMU_PMU_CFG_SIZE + \ |
133 | (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ |
134 | 0); \ |
135 | } \ |
136 | } |
137 | |
138 | /* |
139 | * Define the event attr related functions |
140 | * Input: _name: event attr name |
141 | * _string: string of the event in sysfs |
142 | * _g_idx: event group encoding |
143 | * _event: event encoding |
144 | */ |
145 | #define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \ |
146 | PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \ |
147 | \ |
148 | static struct attribute *_name##_attr[] = { \ |
149 | &event_attr_##_name.attr.attr, \ |
150 | NULL \ |
151 | }; \ |
152 | \ |
153 | static umode_t \ |
154 | _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ |
155 | { \ |
156 | struct device *dev = kobj_to_dev(kobj); \ |
157 | struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ |
158 | \ |
159 | if (!iommu_pmu) \ |
160 | return 0; \ |
161 | return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \ |
162 | } \ |
163 | \ |
164 | static struct attribute_group _name = { \ |
165 | .name = "events", \ |
166 | .attrs = _name##_attr, \ |
167 | .is_visible = _name##_is_visible, \ |
168 | }; |
169 | |
170 | IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001" , 0x0, 0x001) |
171 | IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002" , 0x0, 0x002) |
172 | IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004" , 0x0, 0x004) |
173 | IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008" , 0x0, 0x008) |
174 | IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001" , 0x1, 0x001) |
175 | IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020" , 0x1, 0x020) |
176 | IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040" , 0x1, 0x040) |
177 | IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001" , 0x2, 0x001) |
178 | IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002" , 0x2, 0x002) |
179 | IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004" , 0x2, 0x004) |
180 | IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008" , 0x2, 0x008) |
181 | IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010" , 0x2, 0x010) |
182 | IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020" , 0x2, 0x020) |
183 | IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040" , 0x2, 0x040) |
184 | IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080" , 0x2, 0x080) |
185 | IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100" , 0x2, 0x100) |
186 | IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200" , 0x2, 0x200) |
187 | IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001" , 0x3, 0x001) |
188 | IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002" , 0x3, 0x002) |
189 | IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004" , 0x3, 0x004) |
190 | IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008" , 0x3, 0x008) |
191 | IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001" , 0x4, 0x001) |
192 | IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002" , 0x4, 0x002) |
193 | IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004" , 0x4, 0x004) |
194 | |
195 | static const struct attribute_group *iommu_pmu_attr_update[] = { |
196 | &filter_requester_id_en, |
197 | &filter_domain_en, |
198 | &filter_pasid_en, |
199 | &filter_ats_en, |
200 | &filter_page_table_en, |
201 | &filter_requester_id, |
202 | &filter_domain, |
203 | &filter_pasid, |
204 | &filter_ats, |
205 | &filter_page_table, |
206 | &iommu_clocks, |
207 | &iommu_requests, |
208 | &pw_occupancy, |
209 | &ats_blocked, |
210 | &iommu_mrds, |
211 | &iommu_mem_blocked, |
212 | &pg_req_posted, |
213 | &ctxt_cache_lookup, |
214 | &ctxt_cache_hit, |
215 | &pasid_cache_lookup, |
216 | &pasid_cache_hit, |
217 | &ss_nonleaf_lookup, |
218 | &ss_nonleaf_hit, |
219 | &fs_nonleaf_lookup, |
220 | &fs_nonleaf_hit, |
221 | &hpt_nonleaf_lookup, |
222 | &hpt_nonleaf_hit, |
223 | &iotlb_lookup, |
224 | &iotlb_hit, |
225 | &hpt_leaf_lookup, |
226 | &hpt_leaf_hit, |
227 | &int_cache_lookup, |
228 | &int_cache_hit_nonposted, |
229 | &int_cache_hit_posted, |
230 | NULL |
231 | }; |
232 | |
233 | static inline void __iomem * |
234 | iommu_event_base(struct iommu_pmu *iommu_pmu, int idx) |
235 | { |
236 | return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride; |
237 | } |
238 | |
239 | static inline void __iomem * |
240 | iommu_config_base(struct iommu_pmu *iommu_pmu, int idx) |
241 | { |
242 | return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET; |
243 | } |
244 | |
245 | static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event) |
246 | { |
247 | return container_of(event->pmu, struct iommu_pmu, pmu); |
248 | } |
249 | |
250 | static inline u64 iommu_event_config(struct perf_event *event) |
251 | { |
252 | u64 config = event->attr.config; |
253 | |
254 | return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) | |
255 | (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) | |
256 | IOMMU_EVENT_CFG_INT; |
257 | } |
258 | |
259 | static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu, |
260 | struct perf_event *event) |
261 | { |
262 | return event->pmu == &iommu_pmu->pmu; |
263 | } |
264 | |
265 | static int iommu_pmu_validate_event(struct perf_event *event) |
266 | { |
267 | struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
268 | u32 event_group = iommu_event_group(event->attr.config); |
269 | |
270 | if (event_group >= iommu_pmu->num_eg) |
271 | return -EINVAL; |
272 | |
273 | return 0; |
274 | } |
275 | |
276 | static int iommu_pmu_validate_group(struct perf_event *event) |
277 | { |
278 | struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
279 | struct perf_event *sibling; |
280 | int nr = 0; |
281 | |
282 | /* |
283 | * All events in a group must be scheduled simultaneously. |
284 | * Check whether there is enough counters for all the events. |
285 | */ |
286 | for_each_sibling_event(sibling, event->group_leader) { |
287 | if (!is_iommu_pmu_event(iommu_pmu, event: sibling) || |
288 | sibling->state <= PERF_EVENT_STATE_OFF) |
289 | continue; |
290 | |
291 | if (++nr > iommu_pmu->num_cntr) |
292 | return -EINVAL; |
293 | } |
294 | |
295 | return 0; |
296 | } |
297 | |
298 | static int iommu_pmu_event_init(struct perf_event *event) |
299 | { |
300 | struct hw_perf_event *hwc = &event->hw; |
301 | |
302 | if (event->attr.type != event->pmu->type) |
303 | return -ENOENT; |
304 | |
305 | /* sampling not supported */ |
306 | if (event->attr.sample_period) |
307 | return -EINVAL; |
308 | |
309 | if (event->cpu < 0) |
310 | return -EINVAL; |
311 | |
312 | if (iommu_pmu_validate_event(event)) |
313 | return -EINVAL; |
314 | |
315 | hwc->config = iommu_event_config(event); |
316 | |
317 | return iommu_pmu_validate_group(event); |
318 | } |
319 | |
320 | static void iommu_pmu_event_update(struct perf_event *event) |
321 | { |
322 | struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
323 | struct hw_perf_event *hwc = &event->hw; |
324 | u64 prev_count, new_count, delta; |
325 | int shift = 64 - iommu_pmu->cntr_width; |
326 | |
327 | again: |
328 | prev_count = local64_read(&hwc->prev_count); |
329 | new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); |
330 | if (local64_xchg(&hwc->prev_count, new_count) != prev_count) |
331 | goto again; |
332 | |
333 | /* |
334 | * The counter width is enumerated. Always shift the counter |
335 | * before using it. |
336 | */ |
337 | delta = (new_count << shift) - (prev_count << shift); |
338 | delta >>= shift; |
339 | |
340 | local64_add(delta, &event->count); |
341 | } |
342 | |
343 | static void iommu_pmu_start(struct perf_event *event, int flags) |
344 | { |
345 | struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
346 | struct intel_iommu *iommu = iommu_pmu->iommu; |
347 | struct hw_perf_event *hwc = &event->hw; |
348 | u64 count; |
349 | |
350 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) |
351 | return; |
352 | |
353 | if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX)) |
354 | return; |
355 | |
356 | if (flags & PERF_EF_RELOAD) |
357 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); |
358 | |
359 | hwc->state = 0; |
360 | |
361 | /* Always reprogram the period */ |
362 | count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); |
363 | local64_set((&hwc->prev_count), count); |
364 | |
365 | /* |
366 | * The error of ecmd will be ignored. |
367 | * - The existing perf_event subsystem doesn't handle the error. |
368 | * Only IOMMU PMU returns runtime HW error. We don't want to |
369 | * change the existing generic interfaces for the specific case. |
370 | * - It's a corner case caused by HW, which is very unlikely to |
371 | * happen. There is nothing SW can do. |
372 | * - The worst case is that the user will get <not count> with |
373 | * perf command, which can give the user some hints. |
374 | */ |
375 | ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, oa: hwc->idx, ob: 0); |
376 | |
377 | perf_event_update_userpage(event); |
378 | } |
379 | |
380 | static void iommu_pmu_stop(struct perf_event *event, int flags) |
381 | { |
382 | struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
383 | struct intel_iommu *iommu = iommu_pmu->iommu; |
384 | struct hw_perf_event *hwc = &event->hw; |
385 | |
386 | if (!(hwc->state & PERF_HES_STOPPED)) { |
387 | ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, oa: hwc->idx, ob: 0); |
388 | |
389 | iommu_pmu_event_update(event); |
390 | |
391 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; |
392 | } |
393 | } |
394 | |
395 | static inline int |
396 | iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu, |
397 | int idx, struct perf_event *event) |
398 | { |
399 | u32 event_group = iommu_event_group(event->attr.config); |
400 | u32 select = iommu_event_select(event->attr.config); |
401 | |
402 | if (!(iommu_pmu->cntr_evcap[idx][event_group] & select)) |
403 | return -EINVAL; |
404 | |
405 | return 0; |
406 | } |
407 | |
408 | static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu, |
409 | struct perf_event *event) |
410 | { |
411 | struct hw_perf_event *hwc = &event->hw; |
412 | int idx; |
413 | |
414 | /* |
415 | * The counters which support limited events are usually at the end. |
416 | * Schedule them first to accommodate more events. |
417 | */ |
418 | for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) { |
419 | if (test_and_set_bit(nr: idx, addr: iommu_pmu->used_mask)) |
420 | continue; |
421 | /* Check per-counter event capabilities */ |
422 | if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event)) |
423 | break; |
424 | clear_bit(nr: idx, addr: iommu_pmu->used_mask); |
425 | } |
426 | if (idx < 0) |
427 | return -EINVAL; |
428 | |
429 | iommu_pmu->event_list[idx] = event; |
430 | hwc->idx = idx; |
431 | |
432 | /* config events */ |
433 | dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config); |
434 | |
435 | iommu_pmu_set_filter(requester_id, event->attr.config1, |
436 | IOMMU_PMU_FILTER_REQUESTER_ID, idx, |
437 | event->attr.config1); |
438 | iommu_pmu_set_filter(domain, event->attr.config1, |
439 | IOMMU_PMU_FILTER_DOMAIN, idx, |
440 | event->attr.config1); |
441 | iommu_pmu_set_filter(pasid, event->attr.config2, |
442 | IOMMU_PMU_FILTER_PASID, idx, |
443 | event->attr.config1); |
444 | iommu_pmu_set_filter(ats, event->attr.config2, |
445 | IOMMU_PMU_FILTER_ATS, idx, |
446 | event->attr.config1); |
447 | iommu_pmu_set_filter(page_table, event->attr.config2, |
448 | IOMMU_PMU_FILTER_PAGE_TABLE, idx, |
449 | event->attr.config1); |
450 | |
451 | return 0; |
452 | } |
453 | |
454 | static int iommu_pmu_add(struct perf_event *event, int flags) |
455 | { |
456 | struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
457 | struct hw_perf_event *hwc = &event->hw; |
458 | int ret; |
459 | |
460 | ret = iommu_pmu_assign_event(iommu_pmu, event); |
461 | if (ret < 0) |
462 | return ret; |
463 | |
464 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
465 | |
466 | if (flags & PERF_EF_START) |
467 | iommu_pmu_start(event, flags: 0); |
468 | |
469 | return 0; |
470 | } |
471 | |
472 | static void iommu_pmu_del(struct perf_event *event, int flags) |
473 | { |
474 | struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
475 | int idx = event->hw.idx; |
476 | |
477 | iommu_pmu_stop(event, PERF_EF_UPDATE); |
478 | |
479 | iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx); |
480 | iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx); |
481 | iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx); |
482 | iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx); |
483 | iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx); |
484 | |
485 | iommu_pmu->event_list[idx] = NULL; |
486 | event->hw.idx = -1; |
487 | clear_bit(nr: idx, addr: iommu_pmu->used_mask); |
488 | |
489 | perf_event_update_userpage(event); |
490 | } |
491 | |
492 | static void iommu_pmu_enable(struct pmu *pmu) |
493 | { |
494 | struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); |
495 | struct intel_iommu *iommu = iommu_pmu->iommu; |
496 | |
497 | ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, oa: 0, ob: 0); |
498 | } |
499 | |
500 | static void iommu_pmu_disable(struct pmu *pmu) |
501 | { |
502 | struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); |
503 | struct intel_iommu *iommu = iommu_pmu->iommu; |
504 | |
505 | ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, oa: 0, ob: 0); |
506 | } |
507 | |
508 | static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu) |
509 | { |
510 | struct perf_event *event; |
511 | u64 status; |
512 | int i; |
513 | |
514 | /* |
515 | * Two counters may be overflowed very close. Always check |
516 | * whether there are more to handle. |
517 | */ |
518 | while ((status = dmar_readq(iommu_pmu->overflow))) { |
519 | for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) { |
520 | /* |
521 | * Find the assigned event of the counter. |
522 | * Accumulate the value into the event->count. |
523 | */ |
524 | event = iommu_pmu->event_list[i]; |
525 | if (!event) { |
526 | pr_warn_once("Cannot find the assigned event for counter %d\n" , i); |
527 | continue; |
528 | } |
529 | iommu_pmu_event_update(event); |
530 | } |
531 | |
532 | dmar_writeq(iommu_pmu->overflow, status); |
533 | } |
534 | } |
535 | |
536 | static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id) |
537 | { |
538 | struct intel_iommu *iommu = dev_id; |
539 | |
540 | if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG)) |
541 | return IRQ_NONE; |
542 | |
543 | iommu_pmu_counter_overflow(iommu_pmu: iommu->pmu); |
544 | |
545 | /* Clear the status bit */ |
546 | dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS); |
547 | |
548 | return IRQ_HANDLED; |
549 | } |
550 | |
551 | static int __iommu_pmu_register(struct intel_iommu *iommu) |
552 | { |
553 | struct iommu_pmu *iommu_pmu = iommu->pmu; |
554 | |
555 | iommu_pmu->pmu.name = iommu->name; |
556 | iommu_pmu->pmu.task_ctx_nr = perf_invalid_context; |
557 | iommu_pmu->pmu.event_init = iommu_pmu_event_init; |
558 | iommu_pmu->pmu.pmu_enable = iommu_pmu_enable; |
559 | iommu_pmu->pmu.pmu_disable = iommu_pmu_disable; |
560 | iommu_pmu->pmu.add = iommu_pmu_add; |
561 | iommu_pmu->pmu.del = iommu_pmu_del; |
562 | iommu_pmu->pmu.start = iommu_pmu_start; |
563 | iommu_pmu->pmu.stop = iommu_pmu_stop; |
564 | iommu_pmu->pmu.read = iommu_pmu_event_update; |
565 | iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups; |
566 | iommu_pmu->pmu.attr_update = iommu_pmu_attr_update; |
567 | iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; |
568 | iommu_pmu->pmu.module = THIS_MODULE; |
569 | |
570 | return perf_pmu_register(pmu: &iommu_pmu->pmu, name: iommu_pmu->pmu.name, type: -1); |
571 | } |
572 | |
573 | static inline void __iomem * |
574 | get_perf_reg_address(struct intel_iommu *iommu, u32 offset) |
575 | { |
576 | u32 off = dmar_readl(iommu->reg + offset); |
577 | |
578 | return iommu->reg + off; |
579 | } |
580 | |
581 | int alloc_iommu_pmu(struct intel_iommu *iommu) |
582 | { |
583 | struct iommu_pmu *iommu_pmu; |
584 | int i, j, ret; |
585 | u64 perfcap; |
586 | u32 cap; |
587 | |
588 | if (!ecap_pms(iommu->ecap)) |
589 | return 0; |
590 | |
591 | /* The IOMMU PMU requires the ECMD support as well */ |
592 | if (!cap_ecmds(iommu->cap)) |
593 | return -ENODEV; |
594 | |
595 | perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); |
596 | /* The performance monitoring is not supported. */ |
597 | if (!perfcap) |
598 | return -ENODEV; |
599 | |
600 | /* Sanity check for the number of the counters and event groups */ |
601 | if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) |
602 | return -ENODEV; |
603 | |
604 | /* The interrupt on overflow is required */ |
605 | if (!pcap_interrupt(perfcap)) |
606 | return -ENODEV; |
607 | |
608 | /* Check required Enhanced Command Capability */ |
609 | if (!ecmd_has_pmu_essential(iommu)) |
610 | return -ENODEV; |
611 | |
612 | iommu_pmu = kzalloc(size: sizeof(*iommu_pmu), GFP_KERNEL); |
613 | if (!iommu_pmu) |
614 | return -ENOMEM; |
615 | |
616 | iommu_pmu->num_cntr = pcap_num_cntr(perfcap); |
617 | if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) { |
618 | pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!" , |
619 | iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX); |
620 | iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX; |
621 | } |
622 | |
623 | iommu_pmu->cntr_width = pcap_cntr_width(perfcap); |
624 | iommu_pmu->filter = pcap_filters_mask(perfcap); |
625 | iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); |
626 | iommu_pmu->num_eg = pcap_num_event_group(perfcap); |
627 | |
628 | iommu_pmu->evcap = kcalloc(n: iommu_pmu->num_eg, size: sizeof(u64), GFP_KERNEL); |
629 | if (!iommu_pmu->evcap) { |
630 | ret = -ENOMEM; |
631 | goto free_pmu; |
632 | } |
633 | |
634 | /* Parse event group capabilities */ |
635 | for (i = 0; i < iommu_pmu->num_eg; i++) { |
636 | u64 pcap; |
637 | |
638 | pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + |
639 | i * IOMMU_PMU_CAP_REGS_STEP); |
640 | iommu_pmu->evcap[i] = pecap_es(pcap); |
641 | } |
642 | |
643 | iommu_pmu->cntr_evcap = kcalloc(n: iommu_pmu->num_cntr, size: sizeof(u32 *), GFP_KERNEL); |
644 | if (!iommu_pmu->cntr_evcap) { |
645 | ret = -ENOMEM; |
646 | goto free_pmu_evcap; |
647 | } |
648 | for (i = 0; i < iommu_pmu->num_cntr; i++) { |
649 | iommu_pmu->cntr_evcap[i] = kcalloc(n: iommu_pmu->num_eg, size: sizeof(u32), GFP_KERNEL); |
650 | if (!iommu_pmu->cntr_evcap[i]) { |
651 | ret = -ENOMEM; |
652 | goto free_pmu_cntr_evcap; |
653 | } |
654 | /* |
655 | * Set to the global capabilities, will adjust according |
656 | * to per-counter capabilities later. |
657 | */ |
658 | for (j = 0; j < iommu_pmu->num_eg; j++) |
659 | iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; |
660 | } |
661 | |
662 | iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); |
663 | iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); |
664 | iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); |
665 | |
666 | /* |
667 | * Check per-counter capabilities. All counters should have the |
668 | * same capabilities on Interrupt on Overflow Support and Counter |
669 | * Width. |
670 | */ |
671 | for (i = 0; i < iommu_pmu->num_cntr; i++) { |
672 | cap = dmar_readl(iommu_pmu->cfg_reg + |
673 | i * IOMMU_PMU_CFG_OFFSET + |
674 | IOMMU_PMU_CFG_CNTRCAP_OFFSET); |
675 | if (!iommu_cntrcap_pcc(cap)) |
676 | continue; |
677 | |
678 | /* |
679 | * It's possible that some counters have a different |
680 | * capability because of e.g., HW bug. Check the corner |
681 | * case here and simply drop those counters. |
682 | */ |
683 | if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || |
684 | !iommu_cntrcap_ios(cap)) { |
685 | iommu_pmu->num_cntr = i; |
686 | pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n" , |
687 | iommu_pmu->num_cntr); |
688 | } |
689 | |
690 | /* Clear the pre-defined events group */ |
691 | for (j = 0; j < iommu_pmu->num_eg; j++) |
692 | iommu_pmu->cntr_evcap[i][j] = 0; |
693 | |
694 | /* Override with per-counter event capabilities */ |
695 | for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { |
696 | cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + |
697 | IOMMU_PMU_CFG_CNTREVCAP_OFFSET + |
698 | (j * IOMMU_PMU_OFF_REGS_STEP)); |
699 | iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); |
700 | /* |
701 | * Some events may only be supported by a specific counter. |
702 | * Track them in the evcap as well. |
703 | */ |
704 | iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); |
705 | } |
706 | } |
707 | |
708 | iommu_pmu->iommu = iommu; |
709 | iommu->pmu = iommu_pmu; |
710 | |
711 | return 0; |
712 | |
713 | free_pmu_cntr_evcap: |
714 | for (i = 0; i < iommu_pmu->num_cntr; i++) |
715 | kfree(objp: iommu_pmu->cntr_evcap[i]); |
716 | kfree(objp: iommu_pmu->cntr_evcap); |
717 | free_pmu_evcap: |
718 | kfree(objp: iommu_pmu->evcap); |
719 | free_pmu: |
720 | kfree(objp: iommu_pmu); |
721 | |
722 | return ret; |
723 | } |
724 | |
725 | void free_iommu_pmu(struct intel_iommu *iommu) |
726 | { |
727 | struct iommu_pmu *iommu_pmu = iommu->pmu; |
728 | |
729 | if (!iommu_pmu) |
730 | return; |
731 | |
732 | if (iommu_pmu->evcap) { |
733 | int i; |
734 | |
735 | for (i = 0; i < iommu_pmu->num_cntr; i++) |
736 | kfree(objp: iommu_pmu->cntr_evcap[i]); |
737 | kfree(objp: iommu_pmu->cntr_evcap); |
738 | } |
739 | kfree(objp: iommu_pmu->evcap); |
740 | kfree(objp: iommu_pmu); |
741 | iommu->pmu = NULL; |
742 | } |
743 | |
744 | static int iommu_pmu_set_interrupt(struct intel_iommu *iommu) |
745 | { |
746 | struct iommu_pmu *iommu_pmu = iommu->pmu; |
747 | int irq, ret; |
748 | |
749 | irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, node: iommu->node, arg: iommu); |
750 | if (irq <= 0) |
751 | return -EINVAL; |
752 | |
753 | snprintf(buf: iommu_pmu->irq_name, size: sizeof(iommu_pmu->irq_name), fmt: "dmar%d-perf" , iommu->seq_id); |
754 | |
755 | iommu->perf_irq = irq; |
756 | ret = request_threaded_irq(irq, NULL, thread_fn: iommu_pmu_irq_handler, |
757 | IRQF_ONESHOT, name: iommu_pmu->irq_name, dev: iommu); |
758 | if (ret) { |
759 | dmar_free_hwirq(irq); |
760 | iommu->perf_irq = 0; |
761 | return ret; |
762 | } |
763 | return 0; |
764 | } |
765 | |
766 | static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) |
767 | { |
768 | if (!iommu->perf_irq) |
769 | return; |
770 | |
771 | free_irq(iommu->perf_irq, iommu); |
772 | dmar_free_hwirq(irq: iommu->perf_irq); |
773 | iommu->perf_irq = 0; |
774 | } |
775 | |
776 | static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) |
777 | { |
778 | struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); |
779 | |
780 | if (cpumask_empty(srcp: &iommu_pmu_cpu_mask)) |
781 | cpumask_set_cpu(cpu, dstp: &iommu_pmu_cpu_mask); |
782 | |
783 | if (cpumask_test_cpu(cpu, cpumask: &iommu_pmu_cpu_mask)) |
784 | iommu_pmu->cpu = cpu; |
785 | |
786 | return 0; |
787 | } |
788 | |
789 | static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) |
790 | { |
791 | struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); |
792 | int target = cpumask_first(srcp: &iommu_pmu_cpu_mask); |
793 | |
794 | /* |
795 | * The iommu_pmu_cpu_mask has been updated when offline the CPU |
796 | * for the first iommu_pmu. Migrate the other iommu_pmu to the |
797 | * new target. |
798 | */ |
799 | if (target < nr_cpu_ids && target != iommu_pmu->cpu) { |
800 | perf_pmu_migrate_context(pmu: &iommu_pmu->pmu, src_cpu: cpu, dst_cpu: target); |
801 | iommu_pmu->cpu = target; |
802 | return 0; |
803 | } |
804 | |
805 | if (!cpumask_test_and_clear_cpu(cpu, cpumask: &iommu_pmu_cpu_mask)) |
806 | return 0; |
807 | |
808 | target = cpumask_any_but(cpu_online_mask, cpu); |
809 | |
810 | if (target < nr_cpu_ids) |
811 | cpumask_set_cpu(cpu: target, dstp: &iommu_pmu_cpu_mask); |
812 | else |
813 | return 0; |
814 | |
815 | perf_pmu_migrate_context(pmu: &iommu_pmu->pmu, src_cpu: cpu, dst_cpu: target); |
816 | iommu_pmu->cpu = target; |
817 | |
818 | return 0; |
819 | } |
820 | |
821 | static int nr_iommu_pmu; |
822 | static enum cpuhp_state iommu_cpuhp_slot; |
823 | |
824 | static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) |
825 | { |
826 | int ret; |
827 | |
828 | if (!nr_iommu_pmu) { |
829 | ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, |
830 | name: "driver/iommu/intel/perfmon:online" , |
831 | startup: iommu_pmu_cpu_online, |
832 | teardown: iommu_pmu_cpu_offline); |
833 | if (ret < 0) |
834 | return ret; |
835 | iommu_cpuhp_slot = ret; |
836 | } |
837 | |
838 | ret = cpuhp_state_add_instance(state: iommu_cpuhp_slot, node: &iommu_pmu->cpuhp_node); |
839 | if (ret) { |
840 | if (!nr_iommu_pmu) |
841 | cpuhp_remove_multi_state(state: iommu_cpuhp_slot); |
842 | return ret; |
843 | } |
844 | nr_iommu_pmu++; |
845 | |
846 | return 0; |
847 | } |
848 | |
849 | static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) |
850 | { |
851 | cpuhp_state_remove_instance(state: iommu_cpuhp_slot, node: &iommu_pmu->cpuhp_node); |
852 | |
853 | if (--nr_iommu_pmu) |
854 | return; |
855 | |
856 | cpuhp_remove_multi_state(state: iommu_cpuhp_slot); |
857 | } |
858 | |
859 | void iommu_pmu_register(struct intel_iommu *iommu) |
860 | { |
861 | struct iommu_pmu *iommu_pmu = iommu->pmu; |
862 | |
863 | if (!iommu_pmu) |
864 | return; |
865 | |
866 | if (__iommu_pmu_register(iommu)) |
867 | goto err; |
868 | |
869 | if (iommu_pmu_cpuhp_setup(iommu_pmu)) |
870 | goto unregister; |
871 | |
872 | /* Set interrupt for overflow */ |
873 | if (iommu_pmu_set_interrupt(iommu)) |
874 | goto cpuhp_free; |
875 | |
876 | return; |
877 | |
878 | cpuhp_free: |
879 | iommu_pmu_cpuhp_free(iommu_pmu); |
880 | unregister: |
881 | perf_pmu_unregister(pmu: &iommu_pmu->pmu); |
882 | err: |
883 | pr_err("Failed to register PMU for iommu (seq_id = %d)\n" , iommu->seq_id); |
884 | free_iommu_pmu(iommu); |
885 | } |
886 | |
887 | void iommu_pmu_unregister(struct intel_iommu *iommu) |
888 | { |
889 | struct iommu_pmu *iommu_pmu = iommu->pmu; |
890 | |
891 | if (!iommu_pmu) |
892 | return; |
893 | |
894 | iommu_pmu_unset_interrupt(iommu); |
895 | iommu_pmu_cpuhp_free(iommu_pmu); |
896 | perf_pmu_unregister(pmu: &iommu_pmu->pmu); |
897 | } |
898 | |