1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Implement support for AMD Fam19h Branch Sampling feature |
4 | * Based on specifications published in AMD PPR Fam19 Model 01 |
5 | * |
6 | * Copyright 2021 Google LLC |
7 | * Contributed by Stephane Eranian <eranian@google.com> |
8 | */ |
9 | #include <linux/kernel.h> |
10 | #include <linux/jump_label.h> |
11 | #include <asm/msr.h> |
12 | #include <asm/cpufeature.h> |
13 | |
14 | #include "../perf_event.h" |
15 | |
16 | #define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */ |
17 | |
18 | /* Debug Extension Configuration register layout */ |
19 | union amd_debug_extn_cfg { |
20 | __u64 val; |
21 | struct { |
22 | __u64 rsvd0:2, /* reserved */ |
23 | brsmen:1, /* branch sample enable */ |
24 | rsvd4_3:2,/* reserved - must be 0x3 */ |
25 | vb:1, /* valid branches recorded */ |
26 | rsvd2:10, /* reserved */ |
27 | msroff:4, /* index of next entry to write */ |
28 | rsvd3:4, /* reserved */ |
29 | pmc:3, /* #PMC holding the sampling event */ |
30 | rsvd4:37; /* reserved */ |
31 | }; |
32 | }; |
33 | |
34 | static inline unsigned int brs_from(int idx) |
35 | { |
36 | return MSR_AMD_SAMP_BR_FROM + 2 * idx; |
37 | } |
38 | |
39 | static inline unsigned int brs_to(int idx) |
40 | { |
41 | return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; |
42 | } |
43 | |
44 | static __always_inline void set_debug_extn_cfg(u64 val) |
45 | { |
46 | /* bits[4:3] must always be set to 11b */ |
47 | __wrmsr(MSR_AMD_DBG_EXTN_CFG, low: val | 3ULL << 3, high: val >> 32); |
48 | } |
49 | |
50 | static __always_inline u64 get_debug_extn_cfg(void) |
51 | { |
52 | return __rdmsr(MSR_AMD_DBG_EXTN_CFG); |
53 | } |
54 | |
55 | static bool __init amd_brs_detect(void) |
56 | { |
57 | if (!cpu_feature_enabled(X86_FEATURE_BRS)) |
58 | return false; |
59 | |
60 | switch (boot_cpu_data.x86) { |
61 | case 0x19: /* AMD Fam19h (Zen3) */ |
62 | x86_pmu.lbr_nr = 16; |
63 | |
64 | /* No hardware filtering supported */ |
65 | x86_pmu.lbr_sel_map = NULL; |
66 | x86_pmu.lbr_sel_mask = 0; |
67 | break; |
68 | default: |
69 | return false; |
70 | } |
71 | |
72 | return true; |
73 | } |
74 | |
75 | /* |
76 | * Current BRS implementation does not support branch type or privilege level |
77 | * filtering. Therefore, this function simply enforces these limitations. No need for |
78 | * a br_sel_map. Software filtering is not supported because it would not correlate well |
79 | * with a sampling period. |
80 | */ |
81 | static int amd_brs_setup_filter(struct perf_event *event) |
82 | { |
83 | u64 type = event->attr.branch_sample_type; |
84 | |
85 | /* No BRS support */ |
86 | if (!x86_pmu.lbr_nr) |
87 | return -EOPNOTSUPP; |
88 | |
89 | /* Can only capture all branches, i.e., no filtering */ |
90 | if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY) |
91 | return -EINVAL; |
92 | |
93 | return 0; |
94 | } |
95 | |
96 | static inline int amd_is_brs_event(struct perf_event *e) |
97 | { |
98 | return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; |
99 | } |
100 | |
101 | int amd_brs_hw_config(struct perf_event *event) |
102 | { |
103 | int ret = 0; |
104 | |
105 | /* |
106 | * Due to interrupt holding, BRS is not recommended in |
107 | * counting mode. |
108 | */ |
109 | if (!is_sampling_event(event)) |
110 | return -EINVAL; |
111 | |
112 | /* |
113 | * Due to the way BRS operates by holding the interrupt until |
114 | * lbr_nr entries have been captured, it does not make sense |
115 | * to allow sampling on BRS with an event that does not match |
116 | * what BRS is capturing, i.e., retired taken branches. |
117 | * Otherwise the correlation with the event's period is even |
118 | * more loose: |
119 | * |
120 | * With retired taken branch: |
121 | * Effective P = P + 16 + X |
122 | * With any other event: |
123 | * Effective P = P + Y + X |
124 | * |
125 | * Where X is the number of taken branches due to interrupt |
126 | * skid. Skid is large. |
127 | * |
128 | * Where Y is the occurrences of the event while BRS is |
129 | * capturing the lbr_nr entries. |
130 | * |
131 | * By using retired taken branches, we limit the impact on the |
132 | * Y variable. We know it cannot be more than the depth of |
133 | * BRS. |
134 | */ |
135 | if (!amd_is_brs_event(e: event)) |
136 | return -EINVAL; |
137 | |
138 | /* |
139 | * BRS implementation does not work with frequency mode |
140 | * reprogramming of the period. |
141 | */ |
142 | if (event->attr.freq) |
143 | return -EINVAL; |
144 | /* |
145 | * The kernel subtracts BRS depth from period, so it must |
146 | * be big enough. |
147 | */ |
148 | if (event->attr.sample_period <= x86_pmu.lbr_nr) |
149 | return -EINVAL; |
150 | |
151 | /* |
152 | * Check if we can allow PERF_SAMPLE_BRANCH_STACK |
153 | */ |
154 | ret = amd_brs_setup_filter(event); |
155 | |
156 | /* only set in case of success */ |
157 | if (!ret) |
158 | event->hw.flags |= PERF_X86_EVENT_AMD_BRS; |
159 | |
160 | return ret; |
161 | } |
162 | |
163 | /* tos = top of stack, i.e., last valid entry written */ |
164 | static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) |
165 | { |
166 | /* |
167 | * msroff: index of next entry to write so top-of-stack is one off |
168 | * if BRS is full then msroff is set back to 0. |
169 | */ |
170 | return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1; |
171 | } |
172 | |
173 | /* |
174 | * make sure we have a sane BRS offset to begin with |
175 | * especially with kexec |
176 | */ |
177 | void amd_brs_reset(void) |
178 | { |
179 | if (!cpu_feature_enabled(X86_FEATURE_BRS)) |
180 | return; |
181 | |
182 | /* |
183 | * Reset config |
184 | */ |
185 | set_debug_extn_cfg(0); |
186 | |
187 | /* |
188 | * Mark first entry as poisoned |
189 | */ |
190 | wrmsrl(msr: brs_to(idx: 0), BRS_POISON); |
191 | } |
192 | |
193 | int __init amd_brs_init(void) |
194 | { |
195 | if (!amd_brs_detect()) |
196 | return -EOPNOTSUPP; |
197 | |
198 | pr_cont("%d-deep BRS, " , x86_pmu.lbr_nr); |
199 | |
200 | return 0; |
201 | } |
202 | |
203 | void amd_brs_enable(void) |
204 | { |
205 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
206 | union amd_debug_extn_cfg cfg; |
207 | |
208 | /* Activate only on first user */ |
209 | if (++cpuc->brs_active > 1) |
210 | return; |
211 | |
212 | cfg.val = 0; /* reset all fields */ |
213 | cfg.brsmen = 1; /* enable branch sampling */ |
214 | |
215 | /* Set enable bit */ |
216 | set_debug_extn_cfg(cfg.val); |
217 | } |
218 | |
219 | void amd_brs_enable_all(void) |
220 | { |
221 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
222 | if (cpuc->lbr_users) |
223 | amd_brs_enable(); |
224 | } |
225 | |
226 | void amd_brs_disable(void) |
227 | { |
228 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
229 | union amd_debug_extn_cfg cfg; |
230 | |
231 | /* Check if active (could be disabled via x86_pmu_disable_all()) */ |
232 | if (!cpuc->brs_active) |
233 | return; |
234 | |
235 | /* Only disable for last user */ |
236 | if (--cpuc->brs_active) |
237 | return; |
238 | |
239 | /* |
240 | * Clear the brsmen bit but preserve the others as they contain |
241 | * useful state such as vb and msroff |
242 | */ |
243 | cfg.val = get_debug_extn_cfg(); |
244 | |
245 | /* |
246 | * When coming in on interrupt and BRS is full, then hw will have |
247 | * already stopped BRS, no need to issue wrmsr again |
248 | */ |
249 | if (cfg.brsmen) { |
250 | cfg.brsmen = 0; |
251 | set_debug_extn_cfg(cfg.val); |
252 | } |
253 | } |
254 | |
255 | void amd_brs_disable_all(void) |
256 | { |
257 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
258 | if (cpuc->lbr_users) |
259 | amd_brs_disable(); |
260 | } |
261 | |
262 | static bool amd_brs_match_plm(struct perf_event *event, u64 to) |
263 | { |
264 | int type = event->attr.branch_sample_type; |
265 | int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV; |
266 | int plm_u = PERF_SAMPLE_BRANCH_USER; |
267 | |
268 | if (!(type & plm_k) && kernel_ip(ip: to)) |
269 | return 0; |
270 | |
271 | if (!(type & plm_u) && !kernel_ip(ip: to)) |
272 | return 0; |
273 | |
274 | return 1; |
275 | } |
276 | |
277 | /* |
278 | * Caller must ensure amd_brs_inuse() is true before calling |
279 | * return: |
280 | */ |
281 | void amd_brs_drain(void) |
282 | { |
283 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
284 | struct perf_event *event = cpuc->events[0]; |
285 | struct perf_branch_entry *br = cpuc->lbr_entries; |
286 | union amd_debug_extn_cfg cfg; |
287 | u32 i, nr = 0, num, tos, start; |
288 | u32 shift = 64 - boot_cpu_data.x86_virt_bits; |
289 | |
290 | /* |
291 | * BRS event forced on PMC0, |
292 | * so check if there is an event. |
293 | * It is possible to have lbr_users > 0 but the event |
294 | * not yet scheduled due to long latency PMU irq |
295 | */ |
296 | if (!event) |
297 | goto empty; |
298 | |
299 | cfg.val = get_debug_extn_cfg(); |
300 | |
301 | /* Sanity check [0-x86_pmu.lbr_nr] */ |
302 | if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr)) |
303 | goto empty; |
304 | |
305 | /* No valid branch */ |
306 | if (cfg.vb == 0) |
307 | goto empty; |
308 | |
309 | /* |
310 | * msr.off points to next entry to be written |
311 | * tos = most recent entry index = msr.off - 1 |
312 | * BRS register buffer saturates, so we know we have |
313 | * start < tos and that we have to read from start to tos |
314 | */ |
315 | start = 0; |
316 | tos = amd_brs_get_tos(cfg: &cfg); |
317 | |
318 | num = tos - start + 1; |
319 | |
320 | /* |
321 | * BRS is only one pass (saturation) from MSROFF to depth-1 |
322 | * MSROFF wraps to zero when buffer is full |
323 | */ |
324 | for (i = 0; i < num; i++) { |
325 | u32 brs_idx = tos - i; |
326 | u64 from, to; |
327 | |
328 | rdmsrl(brs_to(brs_idx), to); |
329 | |
330 | /* Entry does not belong to us (as marked by kernel) */ |
331 | if (to == BRS_POISON) |
332 | break; |
333 | |
334 | /* |
335 | * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved. |
336 | * Necessary to generate proper virtual addresses suitable for |
337 | * symbolization |
338 | */ |
339 | to = (u64)(((s64)to << shift) >> shift); |
340 | |
341 | if (!amd_brs_match_plm(event, to)) |
342 | continue; |
343 | |
344 | rdmsrl(brs_from(brs_idx), from); |
345 | |
346 | perf_clear_branch_entry_bitfields(br: br+nr); |
347 | |
348 | br[nr].from = from; |
349 | br[nr].to = to; |
350 | |
351 | nr++; |
352 | } |
353 | empty: |
354 | /* Record number of sampled branches */ |
355 | cpuc->lbr_stack.nr = nr; |
356 | } |
357 | |
358 | /* |
359 | * Poison most recent entry to prevent reuse by next task |
360 | * required because BRS entry are not tagged by PID |
361 | */ |
362 | static void amd_brs_poison_buffer(void) |
363 | { |
364 | union amd_debug_extn_cfg cfg; |
365 | unsigned int idx; |
366 | |
367 | /* Get current state */ |
368 | cfg.val = get_debug_extn_cfg(); |
369 | |
370 | /* idx is most recently written entry */ |
371 | idx = amd_brs_get_tos(cfg: &cfg); |
372 | |
373 | /* Poison target of entry */ |
374 | wrmsrl(msr: brs_to(idx), BRS_POISON); |
375 | } |
376 | |
377 | /* |
378 | * On context switch in, we need to make sure no samples from previous user |
379 | * are left in the BRS. |
380 | * |
381 | * On ctxswin, sched_in = true, called after the PMU has started |
382 | * On ctxswout, sched_in = false, called before the PMU is stopped |
383 | */ |
384 | void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) |
385 | { |
386 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
387 | |
388 | /* no active users */ |
389 | if (!cpuc->lbr_users) |
390 | return; |
391 | |
392 | /* |
393 | * On context switch in, we need to ensure we do not use entries |
394 | * from previous BRS user on that CPU, so we poison the buffer as |
395 | * a faster way compared to resetting all entries. |
396 | */ |
397 | if (sched_in) |
398 | amd_brs_poison_buffer(); |
399 | } |
400 | |
401 | /* |
402 | * called from ACPI processor_idle.c or acpi_pad.c |
403 | * with interrupts disabled |
404 | */ |
405 | void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in) |
406 | { |
407 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
408 | union amd_debug_extn_cfg cfg; |
409 | |
410 | /* |
411 | * on mwait in, we may end up in non C0 state. |
412 | * we must disable branch sampling to avoid holding the NMI |
413 | * for too long. We disable it in hardware but we |
414 | * keep the state in cpuc, so we can re-enable. |
415 | * |
416 | * The hardware will deliver the NMI if needed when brsmen cleared |
417 | */ |
418 | if (cpuc->brs_active) { |
419 | cfg.val = get_debug_extn_cfg(); |
420 | cfg.brsmen = !lopwr_in; |
421 | set_debug_extn_cfg(cfg.val); |
422 | } |
423 | } |
424 | |
425 | DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); |
426 | EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb); |
427 | |
428 | void __init amd_brs_lopwr_init(void) |
429 | { |
430 | static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb); |
431 | } |
432 | |