1 | /* |
2 | * SPDX-License-Identifier: MIT |
3 | * |
4 | * Copyright © 2017-2018 Intel Corporation |
5 | */ |
6 | |
7 | #include <linux/pm_runtime.h> |
8 | |
9 | #include "gt/intel_engine.h" |
10 | #include "gt/intel_engine_pm.h" |
11 | #include "gt/intel_engine_regs.h" |
12 | #include "gt/intel_engine_user.h" |
13 | #include "gt/intel_gt.h" |
14 | #include "gt/intel_gt_pm.h" |
15 | #include "gt/intel_gt_regs.h" |
16 | #include "gt/intel_rc6.h" |
17 | #include "gt/intel_rps.h" |
18 | |
19 | #include "i915_drv.h" |
20 | #include "i915_pmu.h" |
21 | |
22 | /* Frequency for the sampling timer for events which need it. */ |
23 | #define FREQUENCY 200 |
24 | #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) |
25 | |
26 | #define ENGINE_SAMPLE_MASK \ |
27 | (BIT(I915_SAMPLE_BUSY) | \ |
28 | BIT(I915_SAMPLE_WAIT) | \ |
29 | BIT(I915_SAMPLE_SEMA)) |
30 | |
31 | static cpumask_t i915_pmu_cpumask; |
32 | static unsigned int i915_pmu_target_cpu = -1; |
33 | |
34 | static struct i915_pmu *event_to_pmu(struct perf_event *event) |
35 | { |
36 | return container_of(event->pmu, struct i915_pmu, base); |
37 | } |
38 | |
39 | static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu) |
40 | { |
41 | return container_of(pmu, struct drm_i915_private, pmu); |
42 | } |
43 | |
44 | static u8 engine_config_sample(u64 config) |
45 | { |
46 | return config & I915_PMU_SAMPLE_MASK; |
47 | } |
48 | |
49 | static u8 engine_event_sample(struct perf_event *event) |
50 | { |
51 | return engine_config_sample(config: event->attr.config); |
52 | } |
53 | |
54 | static u8 engine_event_class(struct perf_event *event) |
55 | { |
56 | return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; |
57 | } |
58 | |
59 | static u8 engine_event_instance(struct perf_event *event) |
60 | { |
61 | return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; |
62 | } |
63 | |
64 | static bool is_engine_config(const u64 config) |
65 | { |
66 | return config < __I915_PMU_OTHER(0); |
67 | } |
68 | |
69 | static unsigned int config_gt_id(const u64 config) |
70 | { |
71 | return config >> __I915_PMU_GT_SHIFT; |
72 | } |
73 | |
74 | static u64 config_counter(const u64 config) |
75 | { |
76 | return config & ~(~0ULL << __I915_PMU_GT_SHIFT); |
77 | } |
78 | |
79 | static unsigned int other_bit(const u64 config) |
80 | { |
81 | unsigned int val; |
82 | |
83 | switch (config_counter(config)) { |
84 | case I915_PMU_ACTUAL_FREQUENCY: |
85 | val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED; |
86 | break; |
87 | case I915_PMU_REQUESTED_FREQUENCY: |
88 | val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED; |
89 | break; |
90 | case I915_PMU_RC6_RESIDENCY: |
91 | val = __I915_PMU_RC6_RESIDENCY_ENABLED; |
92 | break; |
93 | default: |
94 | /* |
95 | * Events that do not require sampling, or tracking state |
96 | * transitions between enabled and disabled can be ignored. |
97 | */ |
98 | return -1; |
99 | } |
100 | |
101 | return I915_ENGINE_SAMPLE_COUNT + |
102 | config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT + |
103 | val; |
104 | } |
105 | |
106 | static unsigned int config_bit(const u64 config) |
107 | { |
108 | if (is_engine_config(config)) |
109 | return engine_config_sample(config); |
110 | else |
111 | return other_bit(config); |
112 | } |
113 | |
114 | static u32 config_mask(const u64 config) |
115 | { |
116 | unsigned int bit = config_bit(config); |
117 | |
118 | if (__builtin_constant_p(config)) |
119 | BUILD_BUG_ON(bit > |
120 | BITS_PER_TYPE(typeof_member(struct i915_pmu, |
121 | enable)) - 1); |
122 | else |
123 | WARN_ON_ONCE(bit > |
124 | BITS_PER_TYPE(typeof_member(struct i915_pmu, |
125 | enable)) - 1); |
126 | |
127 | return BIT(config_bit(config)); |
128 | } |
129 | |
130 | static bool is_engine_event(struct perf_event *event) |
131 | { |
132 | return is_engine_config(config: event->attr.config); |
133 | } |
134 | |
135 | static unsigned int event_bit(struct perf_event *event) |
136 | { |
137 | return config_bit(config: event->attr.config); |
138 | } |
139 | |
140 | static u32 frequency_enabled_mask(void) |
141 | { |
142 | unsigned int i; |
143 | u32 mask = 0; |
144 | |
145 | for (i = 0; i < I915_PMU_MAX_GT; i++) |
146 | mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) | |
147 | config_mask(__I915_PMU_REQUESTED_FREQUENCY(i)); |
148 | |
149 | return mask; |
150 | } |
151 | |
152 | static bool pmu_needs_timer(struct i915_pmu *pmu) |
153 | { |
154 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
155 | u32 enable; |
156 | |
157 | /* |
158 | * Only some counters need the sampling timer. |
159 | * |
160 | * We start with a bitmask of all currently enabled events. |
161 | */ |
162 | enable = pmu->enable; |
163 | |
164 | /* |
165 | * Mask out all the ones which do not need the timer, or in |
166 | * other words keep all the ones that could need the timer. |
167 | */ |
168 | enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK; |
169 | |
170 | /* |
171 | * Also there is software busyness tracking available we do not |
172 | * need the timer for I915_SAMPLE_BUSY counter. |
173 | */ |
174 | if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) |
175 | enable &= ~BIT(I915_SAMPLE_BUSY); |
176 | |
177 | /* |
178 | * If some bits remain it means we need the sampling timer running. |
179 | */ |
180 | return enable; |
181 | } |
182 | |
183 | static u64 __get_rc6(struct intel_gt *gt) |
184 | { |
185 | struct drm_i915_private *i915 = gt->i915; |
186 | u64 val; |
187 | |
188 | val = intel_rc6_residency_ns(rc6: >->rc6, id: INTEL_RC6_RES_RC6); |
189 | |
190 | if (HAS_RC6p(i915)) |
191 | val += intel_rc6_residency_ns(rc6: >->rc6, id: INTEL_RC6_RES_RC6p); |
192 | |
193 | if (HAS_RC6pp(i915)) |
194 | val += intel_rc6_residency_ns(rc6: >->rc6, id: INTEL_RC6_RES_RC6pp); |
195 | |
196 | return val; |
197 | } |
198 | |
199 | static inline s64 ktime_since_raw(const ktime_t kt) |
200 | { |
201 | return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); |
202 | } |
203 | |
204 | static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) |
205 | { |
206 | return pmu->sample[gt_id][sample].cur; |
207 | } |
208 | |
209 | static void |
210 | store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) |
211 | { |
212 | pmu->sample[gt_id][sample].cur = val; |
213 | } |
214 | |
215 | static void |
216 | add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) |
217 | { |
218 | pmu->sample[gt_id][sample].cur += mul_u32_u32(a: val, b: mul); |
219 | } |
220 | |
221 | static u64 get_rc6(struct intel_gt *gt) |
222 | { |
223 | struct drm_i915_private *i915 = gt->i915; |
224 | const unsigned int gt_id = gt->info.id; |
225 | struct i915_pmu *pmu = &i915->pmu; |
226 | intel_wakeref_t wakeref; |
227 | unsigned long flags; |
228 | u64 val; |
229 | |
230 | wakeref = intel_gt_pm_get_if_awake(gt); |
231 | if (wakeref) { |
232 | val = __get_rc6(gt); |
233 | intel_gt_pm_put_async(gt, handle: wakeref); |
234 | } |
235 | |
236 | spin_lock_irqsave(&pmu->lock, flags); |
237 | |
238 | if (wakeref) { |
239 | store_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6, val); |
240 | } else { |
241 | /* |
242 | * We think we are runtime suspended. |
243 | * |
244 | * Report the delta from when the device was suspended to now, |
245 | * on top of the last known real value, as the approximated RC6 |
246 | * counter value. |
247 | */ |
248 | val = ktime_since_raw(kt: pmu->sleep_last[gt_id]); |
249 | val += read_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6); |
250 | } |
251 | |
252 | if (val < read_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6_LAST_REPORTED)) |
253 | val = read_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6_LAST_REPORTED); |
254 | else |
255 | store_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6_LAST_REPORTED, val); |
256 | |
257 | spin_unlock_irqrestore(lock: &pmu->lock, flags); |
258 | |
259 | return val; |
260 | } |
261 | |
262 | static void init_rc6(struct i915_pmu *pmu) |
263 | { |
264 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
265 | struct intel_gt *gt; |
266 | unsigned int i; |
267 | |
268 | for_each_gt(gt, i915, i) { |
269 | intel_wakeref_t wakeref; |
270 | |
271 | with_intel_runtime_pm(gt->uncore->rpm, wakeref) { |
272 | u64 val = __get_rc6(gt); |
273 | |
274 | store_sample(pmu, gt_id: i, sample: __I915_SAMPLE_RC6, val); |
275 | store_sample(pmu, gt_id: i, sample: __I915_SAMPLE_RC6_LAST_REPORTED, |
276 | val); |
277 | pmu->sleep_last[i] = ktime_get_raw(); |
278 | } |
279 | } |
280 | } |
281 | |
282 | static void park_rc6(struct intel_gt *gt) |
283 | { |
284 | struct i915_pmu *pmu = >->i915->pmu; |
285 | |
286 | store_sample(pmu, gt_id: gt->info.id, sample: __I915_SAMPLE_RC6, val: __get_rc6(gt)); |
287 | pmu->sleep_last[gt->info.id] = ktime_get_raw(); |
288 | } |
289 | |
290 | static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) |
291 | { |
292 | if (!pmu->timer_enabled && pmu_needs_timer(pmu)) { |
293 | pmu->timer_enabled = true; |
294 | pmu->timer_last = ktime_get(); |
295 | hrtimer_start_range_ns(timer: &pmu->timer, |
296 | tim: ns_to_ktime(PERIOD), range_ns: 0, |
297 | mode: HRTIMER_MODE_REL_PINNED); |
298 | } |
299 | } |
300 | |
301 | void i915_pmu_gt_parked(struct intel_gt *gt) |
302 | { |
303 | struct i915_pmu *pmu = >->i915->pmu; |
304 | |
305 | if (!pmu->base.event_init) |
306 | return; |
307 | |
308 | spin_lock_irq(lock: &pmu->lock); |
309 | |
310 | park_rc6(gt); |
311 | |
312 | /* |
313 | * Signal sampling timer to stop if only engine events are enabled and |
314 | * GPU went idle. |
315 | */ |
316 | pmu->unparked &= ~BIT(gt->info.id); |
317 | if (pmu->unparked == 0) |
318 | pmu->timer_enabled = false; |
319 | |
320 | spin_unlock_irq(lock: &pmu->lock); |
321 | } |
322 | |
323 | void i915_pmu_gt_unparked(struct intel_gt *gt) |
324 | { |
325 | struct i915_pmu *pmu = >->i915->pmu; |
326 | |
327 | if (!pmu->base.event_init) |
328 | return; |
329 | |
330 | spin_lock_irq(lock: &pmu->lock); |
331 | |
332 | /* |
333 | * Re-enable sampling timer when GPU goes active. |
334 | */ |
335 | if (pmu->unparked == 0) |
336 | __i915_pmu_maybe_start_timer(pmu); |
337 | |
338 | pmu->unparked |= BIT(gt->info.id); |
339 | |
340 | spin_unlock_irq(lock: &pmu->lock); |
341 | } |
342 | |
343 | static void |
344 | add_sample(struct i915_pmu_sample *sample, u32 val) |
345 | { |
346 | sample->cur += val; |
347 | } |
348 | |
349 | static bool exclusive_mmio_access(const struct drm_i915_private *i915) |
350 | { |
351 | /* |
352 | * We have to avoid concurrent mmio cache line access on gen7 or |
353 | * risk a machine hang. For a fun history lesson dig out the old |
354 | * userspace intel_gpu_top and run it on Ivybridge or Haswell! |
355 | */ |
356 | return GRAPHICS_VER(i915) == 7; |
357 | } |
358 | |
359 | static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) |
360 | { |
361 | struct intel_engine_pmu *pmu = &engine->pmu; |
362 | bool busy; |
363 | u32 val; |
364 | |
365 | val = ENGINE_READ_FW(engine, RING_CTL); |
366 | if (val == 0) /* powerwell off => engine idle */ |
367 | return; |
368 | |
369 | if (val & RING_WAIT) |
370 | add_sample(sample: &pmu->sample[I915_SAMPLE_WAIT], val: period_ns); |
371 | if (val & RING_WAIT_SEMAPHORE) |
372 | add_sample(sample: &pmu->sample[I915_SAMPLE_SEMA], val: period_ns); |
373 | |
374 | /* No need to sample when busy stats are supported. */ |
375 | if (intel_engine_supports_stats(engine)) |
376 | return; |
377 | |
378 | /* |
379 | * While waiting on a semaphore or event, MI_MODE reports the |
380 | * ring as idle. However, previously using the seqno, and with |
381 | * execlists sampling, we account for the ring waiting as the |
382 | * engine being busy. Therefore, we record the sample as being |
383 | * busy if either waiting or !idle. |
384 | */ |
385 | busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); |
386 | if (!busy) { |
387 | val = ENGINE_READ_FW(engine, RING_MI_MODE); |
388 | busy = !(val & MODE_IDLE); |
389 | } |
390 | if (busy) |
391 | add_sample(sample: &pmu->sample[I915_SAMPLE_BUSY], val: period_ns); |
392 | } |
393 | |
394 | static void |
395 | engines_sample(struct intel_gt *gt, unsigned int period_ns) |
396 | { |
397 | struct drm_i915_private *i915 = gt->i915; |
398 | struct intel_engine_cs *engine; |
399 | enum intel_engine_id id; |
400 | unsigned long flags; |
401 | |
402 | if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) |
403 | return; |
404 | |
405 | if (!intel_gt_pm_is_awake(gt)) |
406 | return; |
407 | |
408 | for_each_engine(engine, gt, id) { |
409 | if (!engine->pmu.enable) |
410 | continue; |
411 | |
412 | if (!intel_engine_pm_get_if_awake(engine)) |
413 | continue; |
414 | |
415 | if (exclusive_mmio_access(i915)) { |
416 | spin_lock_irqsave(&engine->uncore->lock, flags); |
417 | engine_sample(engine, period_ns); |
418 | spin_unlock_irqrestore(lock: &engine->uncore->lock, flags); |
419 | } else { |
420 | engine_sample(engine, period_ns); |
421 | } |
422 | |
423 | intel_engine_pm_put_async(engine); |
424 | } |
425 | } |
426 | |
427 | static bool |
428 | frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt) |
429 | { |
430 | return pmu->enable & |
431 | (config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) | |
432 | config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt))); |
433 | } |
434 | |
435 | static void |
436 | frequency_sample(struct intel_gt *gt, unsigned int period_ns) |
437 | { |
438 | struct drm_i915_private *i915 = gt->i915; |
439 | const unsigned int gt_id = gt->info.id; |
440 | struct i915_pmu *pmu = &i915->pmu; |
441 | struct intel_rps *rps = >->rps; |
442 | intel_wakeref_t wakeref; |
443 | |
444 | if (!frequency_sampling_enabled(pmu, gt: gt_id)) |
445 | return; |
446 | |
447 | /* Report 0/0 (actual/requested) frequency while parked. */ |
448 | wakeref = intel_gt_pm_get_if_awake(gt); |
449 | if (!wakeref) |
450 | return; |
451 | |
452 | if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) { |
453 | u32 val; |
454 | |
455 | /* |
456 | * We take a quick peek here without using forcewake |
457 | * so that we don't perturb the system under observation |
458 | * (forcewake => !rc6 => increased power use). We expect |
459 | * that if the read fails because it is outside of the |
460 | * mmio power well, then it will return 0 -- in which |
461 | * case we assume the system is running at the intended |
462 | * frequency. Fortunately, the read should rarely fail! |
463 | */ |
464 | val = intel_rps_read_actual_frequency_fw(rps); |
465 | if (!val) |
466 | val = intel_gpu_freq(rps, val: rps->cur_freq); |
467 | |
468 | add_sample_mult(pmu, gt_id, sample: __I915_SAMPLE_FREQ_ACT, |
469 | val, mul: period_ns / 1000); |
470 | } |
471 | |
472 | if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) { |
473 | add_sample_mult(pmu, gt_id, sample: __I915_SAMPLE_FREQ_REQ, |
474 | val: intel_rps_get_requested_frequency(rps), |
475 | mul: period_ns / 1000); |
476 | } |
477 | |
478 | intel_gt_pm_put_async(gt, handle: wakeref); |
479 | } |
480 | |
481 | static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) |
482 | { |
483 | struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer); |
484 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
485 | unsigned int period_ns; |
486 | struct intel_gt *gt; |
487 | unsigned int i; |
488 | ktime_t now; |
489 | |
490 | if (!READ_ONCE(pmu->timer_enabled)) |
491 | return HRTIMER_NORESTART; |
492 | |
493 | now = ktime_get(); |
494 | period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last)); |
495 | pmu->timer_last = now; |
496 | |
497 | /* |
498 | * Strictly speaking the passed in period may not be 100% accurate for |
499 | * all internal calculation, since some amount of time can be spent on |
500 | * grabbing the forcewake. However the potential error from timer call- |
501 | * back delay greatly dominates this so we keep it simple. |
502 | */ |
503 | |
504 | for_each_gt(gt, i915, i) { |
505 | if (!(pmu->unparked & BIT(i))) |
506 | continue; |
507 | |
508 | engines_sample(gt, period_ns); |
509 | frequency_sample(gt, period_ns); |
510 | } |
511 | |
512 | hrtimer_forward(timer: hrtimer, now, interval: ns_to_ktime(PERIOD)); |
513 | |
514 | return HRTIMER_RESTART; |
515 | } |
516 | |
517 | static void i915_pmu_event_destroy(struct perf_event *event) |
518 | { |
519 | struct i915_pmu *pmu = event_to_pmu(event); |
520 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
521 | |
522 | drm_WARN_ON(&i915->drm, event->parent); |
523 | |
524 | drm_dev_put(dev: &i915->drm); |
525 | } |
526 | |
527 | static int |
528 | engine_event_status(struct intel_engine_cs *engine, |
529 | enum drm_i915_pmu_engine_sample sample) |
530 | { |
531 | switch (sample) { |
532 | case I915_SAMPLE_BUSY: |
533 | case I915_SAMPLE_WAIT: |
534 | break; |
535 | case I915_SAMPLE_SEMA: |
536 | if (GRAPHICS_VER(engine->i915) < 6) |
537 | return -ENODEV; |
538 | break; |
539 | default: |
540 | return -ENOENT; |
541 | } |
542 | |
543 | return 0; |
544 | } |
545 | |
546 | static int |
547 | config_status(struct drm_i915_private *i915, u64 config) |
548 | { |
549 | struct intel_gt *gt = to_gt(i915); |
550 | |
551 | unsigned int gt_id = config_gt_id(config); |
552 | unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0; |
553 | |
554 | if (gt_id > max_gt_id) |
555 | return -ENOENT; |
556 | |
557 | switch (config_counter(config)) { |
558 | case I915_PMU_ACTUAL_FREQUENCY: |
559 | if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) |
560 | /* Requires a mutex for sampling! */ |
561 | return -ENODEV; |
562 | fallthrough; |
563 | case I915_PMU_REQUESTED_FREQUENCY: |
564 | if (GRAPHICS_VER(i915) < 6) |
565 | return -ENODEV; |
566 | break; |
567 | case I915_PMU_INTERRUPTS: |
568 | if (gt_id) |
569 | return -ENOENT; |
570 | break; |
571 | case I915_PMU_RC6_RESIDENCY: |
572 | if (!gt->rc6.supported) |
573 | return -ENODEV; |
574 | break; |
575 | case I915_PMU_SOFTWARE_GT_AWAKE_TIME: |
576 | break; |
577 | default: |
578 | return -ENOENT; |
579 | } |
580 | |
581 | return 0; |
582 | } |
583 | |
584 | static int engine_event_init(struct perf_event *event) |
585 | { |
586 | struct i915_pmu *pmu = event_to_pmu(event); |
587 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
588 | struct intel_engine_cs *engine; |
589 | |
590 | engine = intel_engine_lookup_user(i915, class: engine_event_class(event), |
591 | instance: engine_event_instance(event)); |
592 | if (!engine) |
593 | return -ENODEV; |
594 | |
595 | return engine_event_status(engine, sample: engine_event_sample(event)); |
596 | } |
597 | |
598 | static int i915_pmu_event_init(struct perf_event *event) |
599 | { |
600 | struct i915_pmu *pmu = event_to_pmu(event); |
601 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
602 | int ret; |
603 | |
604 | if (pmu->closed) |
605 | return -ENODEV; |
606 | |
607 | if (event->attr.type != event->pmu->type) |
608 | return -ENOENT; |
609 | |
610 | /* unsupported modes and filters */ |
611 | if (event->attr.sample_period) /* no sampling */ |
612 | return -EINVAL; |
613 | |
614 | if (has_branch_stack(event)) |
615 | return -EOPNOTSUPP; |
616 | |
617 | if (event->cpu < 0) |
618 | return -EINVAL; |
619 | |
620 | /* only allow running on one cpu at a time */ |
621 | if (!cpumask_test_cpu(cpu: event->cpu, cpumask: &i915_pmu_cpumask)) |
622 | return -EINVAL; |
623 | |
624 | if (is_engine_event(event)) |
625 | ret = engine_event_init(event); |
626 | else |
627 | ret = config_status(i915, config: event->attr.config); |
628 | if (ret) |
629 | return ret; |
630 | |
631 | if (!event->parent) { |
632 | drm_dev_get(dev: &i915->drm); |
633 | event->destroy = i915_pmu_event_destroy; |
634 | } |
635 | |
636 | return 0; |
637 | } |
638 | |
639 | static u64 __i915_pmu_event_read(struct perf_event *event) |
640 | { |
641 | struct i915_pmu *pmu = event_to_pmu(event); |
642 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
643 | u64 val = 0; |
644 | |
645 | if (is_engine_event(event)) { |
646 | u8 sample = engine_event_sample(event); |
647 | struct intel_engine_cs *engine; |
648 | |
649 | engine = intel_engine_lookup_user(i915, |
650 | class: engine_event_class(event), |
651 | instance: engine_event_instance(event)); |
652 | |
653 | if (drm_WARN_ON_ONCE(&i915->drm, !engine)) { |
654 | /* Do nothing */ |
655 | } else if (sample == I915_SAMPLE_BUSY && |
656 | intel_engine_supports_stats(engine)) { |
657 | ktime_t unused; |
658 | |
659 | val = ktime_to_ns(kt: intel_engine_get_busy_time(engine, |
660 | now: &unused)); |
661 | } else { |
662 | val = engine->pmu.sample[sample].cur; |
663 | } |
664 | } else { |
665 | const unsigned int gt_id = config_gt_id(config: event->attr.config); |
666 | const u64 config = config_counter(config: event->attr.config); |
667 | |
668 | switch (config) { |
669 | case I915_PMU_ACTUAL_FREQUENCY: |
670 | val = |
671 | div_u64(dividend: read_sample(pmu, gt_id, |
672 | sample: __I915_SAMPLE_FREQ_ACT), |
673 | USEC_PER_SEC /* to MHz */); |
674 | break; |
675 | case I915_PMU_REQUESTED_FREQUENCY: |
676 | val = |
677 | div_u64(dividend: read_sample(pmu, gt_id, |
678 | sample: __I915_SAMPLE_FREQ_REQ), |
679 | USEC_PER_SEC /* to MHz */); |
680 | break; |
681 | case I915_PMU_INTERRUPTS: |
682 | val = READ_ONCE(pmu->irq_count); |
683 | break; |
684 | case I915_PMU_RC6_RESIDENCY: |
685 | val = get_rc6(gt: i915->gt[gt_id]); |
686 | break; |
687 | case I915_PMU_SOFTWARE_GT_AWAKE_TIME: |
688 | val = ktime_to_ns(kt: intel_gt_get_awake_time(gt: to_gt(i915))); |
689 | break; |
690 | } |
691 | } |
692 | |
693 | return val; |
694 | } |
695 | |
696 | static void i915_pmu_event_read(struct perf_event *event) |
697 | { |
698 | struct i915_pmu *pmu = event_to_pmu(event); |
699 | struct hw_perf_event *hwc = &event->hw; |
700 | u64 prev, new; |
701 | |
702 | if (pmu->closed) { |
703 | event->hw.state = PERF_HES_STOPPED; |
704 | return; |
705 | } |
706 | |
707 | prev = local64_read(&hwc->prev_count); |
708 | do { |
709 | new = __i915_pmu_event_read(event); |
710 | } while (!local64_try_cmpxchg(l: &hwc->prev_count, old: &prev, new)); |
711 | |
712 | local64_add(new - prev, &event->count); |
713 | } |
714 | |
715 | static void i915_pmu_enable(struct perf_event *event) |
716 | { |
717 | struct i915_pmu *pmu = event_to_pmu(event); |
718 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
719 | const unsigned int bit = event_bit(event); |
720 | unsigned long flags; |
721 | |
722 | if (bit == -1) |
723 | goto update; |
724 | |
725 | spin_lock_irqsave(&pmu->lock, flags); |
726 | |
727 | /* |
728 | * Update the bitmask of enabled events and increment |
729 | * the event reference counter. |
730 | */ |
731 | BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); |
732 | GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); |
733 | GEM_BUG_ON(pmu->enable_count[bit] == ~0); |
734 | |
735 | pmu->enable |= BIT(bit); |
736 | pmu->enable_count[bit]++; |
737 | |
738 | /* |
739 | * Start the sampling timer if needed and not already enabled. |
740 | */ |
741 | __i915_pmu_maybe_start_timer(pmu); |
742 | |
743 | /* |
744 | * For per-engine events the bitmask and reference counting |
745 | * is stored per engine. |
746 | */ |
747 | if (is_engine_event(event)) { |
748 | u8 sample = engine_event_sample(event); |
749 | struct intel_engine_cs *engine; |
750 | |
751 | engine = intel_engine_lookup_user(i915, |
752 | class: engine_event_class(event), |
753 | instance: engine_event_instance(event)); |
754 | |
755 | BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != |
756 | I915_ENGINE_SAMPLE_COUNT); |
757 | BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != |
758 | I915_ENGINE_SAMPLE_COUNT); |
759 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); |
760 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); |
761 | GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); |
762 | |
763 | engine->pmu.enable |= BIT(sample); |
764 | engine->pmu.enable_count[sample]++; |
765 | } |
766 | |
767 | spin_unlock_irqrestore(lock: &pmu->lock, flags); |
768 | |
769 | update: |
770 | /* |
771 | * Store the current counter value so we can report the correct delta |
772 | * for all listeners. Even when the event was already enabled and has |
773 | * an existing non-zero value. |
774 | */ |
775 | local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); |
776 | } |
777 | |
778 | static void i915_pmu_disable(struct perf_event *event) |
779 | { |
780 | struct i915_pmu *pmu = event_to_pmu(event); |
781 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
782 | const unsigned int bit = event_bit(event); |
783 | unsigned long flags; |
784 | |
785 | if (bit == -1) |
786 | return; |
787 | |
788 | spin_lock_irqsave(&pmu->lock, flags); |
789 | |
790 | if (is_engine_event(event)) { |
791 | u8 sample = engine_event_sample(event); |
792 | struct intel_engine_cs *engine; |
793 | |
794 | engine = intel_engine_lookup_user(i915, |
795 | class: engine_event_class(event), |
796 | instance: engine_event_instance(event)); |
797 | |
798 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); |
799 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); |
800 | GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); |
801 | |
802 | /* |
803 | * Decrement the reference count and clear the enabled |
804 | * bitmask when the last listener on an event goes away. |
805 | */ |
806 | if (--engine->pmu.enable_count[sample] == 0) |
807 | engine->pmu.enable &= ~BIT(sample); |
808 | } |
809 | |
810 | GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); |
811 | GEM_BUG_ON(pmu->enable_count[bit] == 0); |
812 | /* |
813 | * Decrement the reference count and clear the enabled |
814 | * bitmask when the last listener on an event goes away. |
815 | */ |
816 | if (--pmu->enable_count[bit] == 0) { |
817 | pmu->enable &= ~BIT(bit); |
818 | pmu->timer_enabled &= pmu_needs_timer(pmu); |
819 | } |
820 | |
821 | spin_unlock_irqrestore(lock: &pmu->lock, flags); |
822 | } |
823 | |
824 | static void i915_pmu_event_start(struct perf_event *event, int flags) |
825 | { |
826 | struct i915_pmu *pmu = event_to_pmu(event); |
827 | |
828 | if (pmu->closed) |
829 | return; |
830 | |
831 | i915_pmu_enable(event); |
832 | event->hw.state = 0; |
833 | } |
834 | |
835 | static void i915_pmu_event_stop(struct perf_event *event, int flags) |
836 | { |
837 | struct drm_i915_private *i915 = |
838 | container_of(event->pmu, typeof(*i915), pmu.base); |
839 | struct i915_pmu *pmu = &i915->pmu; |
840 | |
841 | if (pmu->closed) |
842 | goto out; |
843 | |
844 | if (flags & PERF_EF_UPDATE) |
845 | i915_pmu_event_read(event); |
846 | i915_pmu_disable(event); |
847 | |
848 | out: |
849 | event->hw.state = PERF_HES_STOPPED; |
850 | } |
851 | |
852 | static int i915_pmu_event_add(struct perf_event *event, int flags) |
853 | { |
854 | struct i915_pmu *pmu = event_to_pmu(event); |
855 | |
856 | if (pmu->closed) |
857 | return -ENODEV; |
858 | |
859 | if (flags & PERF_EF_START) |
860 | i915_pmu_event_start(event, flags); |
861 | |
862 | return 0; |
863 | } |
864 | |
865 | static void i915_pmu_event_del(struct perf_event *event, int flags) |
866 | { |
867 | i915_pmu_event_stop(event, PERF_EF_UPDATE); |
868 | } |
869 | |
870 | static int i915_pmu_event_event_idx(struct perf_event *event) |
871 | { |
872 | return 0; |
873 | } |
874 | |
875 | struct i915_str_attribute { |
876 | struct device_attribute attr; |
877 | const char *str; |
878 | }; |
879 | |
880 | static ssize_t i915_pmu_format_show(struct device *dev, |
881 | struct device_attribute *attr, char *buf) |
882 | { |
883 | struct i915_str_attribute *eattr; |
884 | |
885 | eattr = container_of(attr, struct i915_str_attribute, attr); |
886 | return sprintf(buf, fmt: "%s\n" , eattr->str); |
887 | } |
888 | |
889 | #define I915_PMU_FORMAT_ATTR(_name, _config) \ |
890 | (&((struct i915_str_attribute[]) { \ |
891 | { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ |
892 | .str = _config, } \ |
893 | })[0].attr.attr) |
894 | |
895 | static struct attribute *i915_pmu_format_attrs[] = { |
896 | I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20" ), |
897 | NULL, |
898 | }; |
899 | |
900 | static const struct attribute_group i915_pmu_format_attr_group = { |
901 | .name = "format" , |
902 | .attrs = i915_pmu_format_attrs, |
903 | }; |
904 | |
905 | struct i915_ext_attribute { |
906 | struct device_attribute attr; |
907 | unsigned long val; |
908 | }; |
909 | |
910 | static ssize_t i915_pmu_event_show(struct device *dev, |
911 | struct device_attribute *attr, char *buf) |
912 | { |
913 | struct i915_ext_attribute *eattr; |
914 | |
915 | eattr = container_of(attr, struct i915_ext_attribute, attr); |
916 | return sprintf(buf, fmt: "config=0x%lx\n" , eattr->val); |
917 | } |
918 | |
919 | static ssize_t cpumask_show(struct device *dev, |
920 | struct device_attribute *attr, char *buf) |
921 | { |
922 | return cpumap_print_to_pagebuf(list: true, buf, mask: &i915_pmu_cpumask); |
923 | } |
924 | |
925 | static DEVICE_ATTR_RO(cpumask); |
926 | |
927 | static struct attribute *i915_cpumask_attrs[] = { |
928 | &dev_attr_cpumask.attr, |
929 | NULL, |
930 | }; |
931 | |
932 | static const struct attribute_group i915_pmu_cpumask_attr_group = { |
933 | .attrs = i915_cpumask_attrs, |
934 | }; |
935 | |
936 | #define __event(__counter, __name, __unit) \ |
937 | { \ |
938 | .counter = (__counter), \ |
939 | .name = (__name), \ |
940 | .unit = (__unit), \ |
941 | .global = false, \ |
942 | } |
943 | |
944 | #define __global_event(__counter, __name, __unit) \ |
945 | { \ |
946 | .counter = (__counter), \ |
947 | .name = (__name), \ |
948 | .unit = (__unit), \ |
949 | .global = true, \ |
950 | } |
951 | |
952 | #define __engine_event(__sample, __name) \ |
953 | { \ |
954 | .sample = (__sample), \ |
955 | .name = (__name), \ |
956 | } |
957 | |
958 | static struct i915_ext_attribute * |
959 | add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) |
960 | { |
961 | sysfs_attr_init(&attr->attr.attr); |
962 | attr->attr.attr.name = name; |
963 | attr->attr.attr.mode = 0444; |
964 | attr->attr.show = i915_pmu_event_show; |
965 | attr->val = config; |
966 | |
967 | return ++attr; |
968 | } |
969 | |
970 | static struct perf_pmu_events_attr * |
971 | add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, |
972 | const char *str) |
973 | { |
974 | sysfs_attr_init(&attr->attr.attr); |
975 | attr->attr.attr.name = name; |
976 | attr->attr.attr.mode = 0444; |
977 | attr->attr.show = perf_event_sysfs_show; |
978 | attr->event_str = str; |
979 | |
980 | return ++attr; |
981 | } |
982 | |
983 | static struct attribute ** |
984 | create_event_attributes(struct i915_pmu *pmu) |
985 | { |
986 | struct drm_i915_private *i915 = pmu_to_i915(pmu); |
987 | static const struct { |
988 | unsigned int counter; |
989 | const char *name; |
990 | const char *unit; |
991 | bool global; |
992 | } events[] = { |
993 | __event(0, "actual-frequency" , "M" ), |
994 | __event(1, "requested-frequency" , "M" ), |
995 | __global_event(2, "interrupts" , NULL), |
996 | __event(3, "rc6-residency" , "ns" ), |
997 | __event(4, "software-gt-awake-time" , "ns" ), |
998 | }; |
999 | static const struct { |
1000 | enum drm_i915_pmu_engine_sample sample; |
1001 | char *name; |
1002 | } engine_events[] = { |
1003 | __engine_event(I915_SAMPLE_BUSY, "busy" ), |
1004 | __engine_event(I915_SAMPLE_SEMA, "sema" ), |
1005 | __engine_event(I915_SAMPLE_WAIT, "wait" ), |
1006 | }; |
1007 | unsigned int count = 0; |
1008 | struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; |
1009 | struct i915_ext_attribute *i915_attr = NULL, *i915_iter; |
1010 | struct attribute **attr = NULL, **attr_iter; |
1011 | struct intel_engine_cs *engine; |
1012 | struct intel_gt *gt; |
1013 | unsigned int i, j; |
1014 | |
1015 | /* Count how many counters we will be exposing. */ |
1016 | for_each_gt(gt, i915, j) { |
1017 | for (i = 0; i < ARRAY_SIZE(events); i++) { |
1018 | u64 config = ___I915_PMU_OTHER(j, events[i].counter); |
1019 | |
1020 | if (!config_status(i915, config)) |
1021 | count++; |
1022 | } |
1023 | } |
1024 | |
1025 | for_each_uabi_engine(engine, i915) { |
1026 | for (i = 0; i < ARRAY_SIZE(engine_events); i++) { |
1027 | if (!engine_event_status(engine, |
1028 | sample: engine_events[i].sample)) |
1029 | count++; |
1030 | } |
1031 | } |
1032 | |
1033 | /* Allocate attribute objects and table. */ |
1034 | i915_attr = kcalloc(n: count, size: sizeof(*i915_attr), GFP_KERNEL); |
1035 | if (!i915_attr) |
1036 | goto err_alloc; |
1037 | |
1038 | pmu_attr = kcalloc(n: count, size: sizeof(*pmu_attr), GFP_KERNEL); |
1039 | if (!pmu_attr) |
1040 | goto err_alloc; |
1041 | |
1042 | /* Max one pointer of each attribute type plus a termination entry. */ |
1043 | attr = kcalloc(n: count * 2 + 1, size: sizeof(*attr), GFP_KERNEL); |
1044 | if (!attr) |
1045 | goto err_alloc; |
1046 | |
1047 | i915_iter = i915_attr; |
1048 | pmu_iter = pmu_attr; |
1049 | attr_iter = attr; |
1050 | |
1051 | /* Initialize supported non-engine counters. */ |
1052 | for_each_gt(gt, i915, j) { |
1053 | for (i = 0; i < ARRAY_SIZE(events); i++) { |
1054 | u64 config = ___I915_PMU_OTHER(j, events[i].counter); |
1055 | char *str; |
1056 | |
1057 | if (config_status(i915, config)) |
1058 | continue; |
1059 | |
1060 | if (events[i].global || !HAS_EXTRA_GT_LIST(i915)) |
1061 | str = kstrdup(s: events[i].name, GFP_KERNEL); |
1062 | else |
1063 | str = kasprintf(GFP_KERNEL, fmt: "%s-gt%u" , |
1064 | events[i].name, j); |
1065 | if (!str) |
1066 | goto err; |
1067 | |
1068 | *attr_iter++ = &i915_iter->attr.attr; |
1069 | i915_iter = add_i915_attr(attr: i915_iter, name: str, config); |
1070 | |
1071 | if (events[i].unit) { |
1072 | if (events[i].global || !HAS_EXTRA_GT_LIST(i915)) |
1073 | str = kasprintf(GFP_KERNEL, fmt: "%s.unit" , |
1074 | events[i].name); |
1075 | else |
1076 | str = kasprintf(GFP_KERNEL, fmt: "%s-gt%u.unit" , |
1077 | events[i].name, j); |
1078 | if (!str) |
1079 | goto err; |
1080 | |
1081 | *attr_iter++ = &pmu_iter->attr.attr; |
1082 | pmu_iter = add_pmu_attr(attr: pmu_iter, name: str, |
1083 | str: events[i].unit); |
1084 | } |
1085 | } |
1086 | } |
1087 | |
1088 | /* Initialize supported engine counters. */ |
1089 | for_each_uabi_engine(engine, i915) { |
1090 | for (i = 0; i < ARRAY_SIZE(engine_events); i++) { |
1091 | char *str; |
1092 | |
1093 | if (engine_event_status(engine, |
1094 | sample: engine_events[i].sample)) |
1095 | continue; |
1096 | |
1097 | str = kasprintf(GFP_KERNEL, fmt: "%s-%s" , |
1098 | engine->name, engine_events[i].name); |
1099 | if (!str) |
1100 | goto err; |
1101 | |
1102 | *attr_iter++ = &i915_iter->attr.attr; |
1103 | i915_iter = |
1104 | add_i915_attr(attr: i915_iter, name: str, |
1105 | __I915_PMU_ENGINE(engine->uabi_class, |
1106 | engine->uabi_instance, |
1107 | engine_events[i].sample)); |
1108 | |
1109 | str = kasprintf(GFP_KERNEL, fmt: "%s-%s.unit" , |
1110 | engine->name, engine_events[i].name); |
1111 | if (!str) |
1112 | goto err; |
1113 | |
1114 | *attr_iter++ = &pmu_iter->attr.attr; |
1115 | pmu_iter = add_pmu_attr(attr: pmu_iter, name: str, str: "ns" ); |
1116 | } |
1117 | } |
1118 | |
1119 | pmu->i915_attr = i915_attr; |
1120 | pmu->pmu_attr = pmu_attr; |
1121 | |
1122 | return attr; |
1123 | |
1124 | err:; |
1125 | for (attr_iter = attr; *attr_iter; attr_iter++) |
1126 | kfree(objp: (*attr_iter)->name); |
1127 | |
1128 | err_alloc: |
1129 | kfree(objp: attr); |
1130 | kfree(objp: i915_attr); |
1131 | kfree(objp: pmu_attr); |
1132 | |
1133 | return NULL; |
1134 | } |
1135 | |
1136 | static void free_event_attributes(struct i915_pmu *pmu) |
1137 | { |
1138 | struct attribute **attr_iter = pmu->events_attr_group.attrs; |
1139 | |
1140 | for (; *attr_iter; attr_iter++) |
1141 | kfree(objp: (*attr_iter)->name); |
1142 | |
1143 | kfree(objp: pmu->events_attr_group.attrs); |
1144 | kfree(objp: pmu->i915_attr); |
1145 | kfree(objp: pmu->pmu_attr); |
1146 | |
1147 | pmu->events_attr_group.attrs = NULL; |
1148 | pmu->i915_attr = NULL; |
1149 | pmu->pmu_attr = NULL; |
1150 | } |
1151 | |
1152 | static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) |
1153 | { |
1154 | struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); |
1155 | |
1156 | GEM_BUG_ON(!pmu->base.event_init); |
1157 | |
1158 | /* Select the first online CPU as a designated reader. */ |
1159 | if (cpumask_empty(srcp: &i915_pmu_cpumask)) |
1160 | cpumask_set_cpu(cpu, dstp: &i915_pmu_cpumask); |
1161 | |
1162 | return 0; |
1163 | } |
1164 | |
1165 | static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) |
1166 | { |
1167 | struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); |
1168 | unsigned int target = i915_pmu_target_cpu; |
1169 | |
1170 | GEM_BUG_ON(!pmu->base.event_init); |
1171 | |
1172 | /* |
1173 | * Unregistering an instance generates a CPU offline event which we must |
1174 | * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask. |
1175 | */ |
1176 | if (pmu->closed) |
1177 | return 0; |
1178 | |
1179 | if (cpumask_test_and_clear_cpu(cpu, cpumask: &i915_pmu_cpumask)) { |
1180 | target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); |
1181 | |
1182 | /* Migrate events if there is a valid target */ |
1183 | if (target < nr_cpu_ids) { |
1184 | cpumask_set_cpu(cpu: target, dstp: &i915_pmu_cpumask); |
1185 | i915_pmu_target_cpu = target; |
1186 | } |
1187 | } |
1188 | |
1189 | if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) { |
1190 | perf_pmu_migrate_context(pmu: &pmu->base, src_cpu: cpu, dst_cpu: target); |
1191 | pmu->cpuhp.cpu = target; |
1192 | } |
1193 | |
1194 | return 0; |
1195 | } |
1196 | |
1197 | static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; |
1198 | |
1199 | int i915_pmu_init(void) |
1200 | { |
1201 | int ret; |
1202 | |
1203 | ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, |
1204 | name: "perf/x86/intel/i915:online" , |
1205 | startup: i915_pmu_cpu_online, |
1206 | teardown: i915_pmu_cpu_offline); |
1207 | if (ret < 0) |
1208 | pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n" , |
1209 | ret); |
1210 | else |
1211 | cpuhp_slot = ret; |
1212 | |
1213 | return 0; |
1214 | } |
1215 | |
1216 | void i915_pmu_exit(void) |
1217 | { |
1218 | if (cpuhp_slot != CPUHP_INVALID) |
1219 | cpuhp_remove_multi_state(state: cpuhp_slot); |
1220 | } |
1221 | |
1222 | static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) |
1223 | { |
1224 | if (cpuhp_slot == CPUHP_INVALID) |
1225 | return -EINVAL; |
1226 | |
1227 | return cpuhp_state_add_instance(state: cpuhp_slot, node: &pmu->cpuhp.node); |
1228 | } |
1229 | |
1230 | static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) |
1231 | { |
1232 | cpuhp_state_remove_instance(state: cpuhp_slot, node: &pmu->cpuhp.node); |
1233 | } |
1234 | |
1235 | static bool is_igp(struct drm_i915_private *i915) |
1236 | { |
1237 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); |
1238 | |
1239 | /* IGP is 0000:00:02.0 */ |
1240 | return pci_domain_nr(bus: pdev->bus) == 0 && |
1241 | pdev->bus->number == 0 && |
1242 | PCI_SLOT(pdev->devfn) == 2 && |
1243 | PCI_FUNC(pdev->devfn) == 0; |
1244 | } |
1245 | |
1246 | void i915_pmu_register(struct drm_i915_private *i915) |
1247 | { |
1248 | struct i915_pmu *pmu = &i915->pmu; |
1249 | const struct attribute_group *attr_groups[] = { |
1250 | &i915_pmu_format_attr_group, |
1251 | &pmu->events_attr_group, |
1252 | &i915_pmu_cpumask_attr_group, |
1253 | NULL |
1254 | }; |
1255 | |
1256 | int ret = -ENOMEM; |
1257 | |
1258 | if (GRAPHICS_VER(i915) <= 2) { |
1259 | drm_info(&i915->drm, "PMU not supported for this GPU." ); |
1260 | return; |
1261 | } |
1262 | |
1263 | spin_lock_init(&pmu->lock); |
1264 | hrtimer_init(timer: &pmu->timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL); |
1265 | pmu->timer.function = i915_sample; |
1266 | pmu->cpuhp.cpu = -1; |
1267 | init_rc6(pmu); |
1268 | |
1269 | if (!is_igp(i915)) { |
1270 | pmu->name = kasprintf(GFP_KERNEL, |
1271 | fmt: "i915_%s" , |
1272 | dev_name(dev: i915->drm.dev)); |
1273 | if (pmu->name) { |
1274 | /* tools/perf reserves colons as special. */ |
1275 | strreplace(str: (char *)pmu->name, old: ':', new: '_'); |
1276 | } |
1277 | } else { |
1278 | pmu->name = "i915" ; |
1279 | } |
1280 | if (!pmu->name) |
1281 | goto err; |
1282 | |
1283 | pmu->events_attr_group.name = "events" ; |
1284 | pmu->events_attr_group.attrs = create_event_attributes(pmu); |
1285 | if (!pmu->events_attr_group.attrs) |
1286 | goto err_name; |
1287 | |
1288 | pmu->base.attr_groups = kmemdup(p: attr_groups, size: sizeof(attr_groups), |
1289 | GFP_KERNEL); |
1290 | if (!pmu->base.attr_groups) |
1291 | goto err_attr; |
1292 | |
1293 | pmu->base.module = THIS_MODULE; |
1294 | pmu->base.task_ctx_nr = perf_invalid_context; |
1295 | pmu->base.event_init = i915_pmu_event_init; |
1296 | pmu->base.add = i915_pmu_event_add; |
1297 | pmu->base.del = i915_pmu_event_del; |
1298 | pmu->base.start = i915_pmu_event_start; |
1299 | pmu->base.stop = i915_pmu_event_stop; |
1300 | pmu->base.read = i915_pmu_event_read; |
1301 | pmu->base.event_idx = i915_pmu_event_event_idx; |
1302 | |
1303 | ret = perf_pmu_register(pmu: &pmu->base, name: pmu->name, type: -1); |
1304 | if (ret) |
1305 | goto err_groups; |
1306 | |
1307 | ret = i915_pmu_register_cpuhp_state(pmu); |
1308 | if (ret) |
1309 | goto err_unreg; |
1310 | |
1311 | return; |
1312 | |
1313 | err_unreg: |
1314 | perf_pmu_unregister(pmu: &pmu->base); |
1315 | err_groups: |
1316 | kfree(objp: pmu->base.attr_groups); |
1317 | err_attr: |
1318 | pmu->base.event_init = NULL; |
1319 | free_event_attributes(pmu); |
1320 | err_name: |
1321 | if (!is_igp(i915)) |
1322 | kfree(objp: pmu->name); |
1323 | err: |
1324 | drm_notice(&i915->drm, "Failed to register PMU!\n" ); |
1325 | } |
1326 | |
1327 | void i915_pmu_unregister(struct drm_i915_private *i915) |
1328 | { |
1329 | struct i915_pmu *pmu = &i915->pmu; |
1330 | |
1331 | if (!pmu->base.event_init) |
1332 | return; |
1333 | |
1334 | /* |
1335 | * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu |
1336 | * ensures all currently executing ones will have exited before we |
1337 | * proceed with unregistration. |
1338 | */ |
1339 | pmu->closed = true; |
1340 | synchronize_rcu(); |
1341 | |
1342 | hrtimer_cancel(timer: &pmu->timer); |
1343 | |
1344 | i915_pmu_unregister_cpuhp_state(pmu); |
1345 | |
1346 | perf_pmu_unregister(pmu: &pmu->base); |
1347 | pmu->base.event_init = NULL; |
1348 | kfree(objp: pmu->base.attr_groups); |
1349 | if (!is_igp(i915)) |
1350 | kfree(objp: pmu->name); |
1351 | free_event_attributes(pmu); |
1352 | } |
1353 | |