1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/perf_event.h> |
3 | #include <asm/perf_event.h> |
4 | |
5 | #include "../perf_event.h" |
6 | |
7 | /* LBR Branch Select valid bits */ |
8 | #define LBR_SELECT_MASK 0x1ff |
9 | |
10 | /* |
11 | * LBR Branch Select filter bits which when set, ensures that the |
12 | * corresponding type of branches are not recorded |
13 | */ |
14 | #define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */ |
15 | #define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */ |
16 | #define LBR_SELECT_JCC 2 /* Conditional branches */ |
17 | #define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */ |
18 | #define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */ |
19 | #define LBR_SELECT_RET_NEAR 5 /* Near returns */ |
20 | #define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */ |
21 | #define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */ |
22 | #define LBR_SELECT_FAR_BRANCH 8 /* Far branches */ |
23 | |
24 | #define LBR_KERNEL BIT(LBR_SELECT_KERNEL) |
25 | #define LBR_USER BIT(LBR_SELECT_USER) |
26 | #define LBR_JCC BIT(LBR_SELECT_JCC) |
27 | #define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL) |
28 | #define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND) |
29 | #define LBR_RETURN BIT(LBR_SELECT_RET_NEAR) |
30 | #define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL) |
31 | #define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND) |
32 | #define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH) |
33 | #define LBR_NOT_SUPP -1 /* unsupported filter */ |
34 | #define LBR_IGNORE 0 |
35 | |
36 | #define LBR_ANY \ |
37 | (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \ |
38 | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR) |
39 | |
40 | struct branch_entry { |
41 | union { |
42 | struct { |
43 | u64 ip:58; |
44 | u64 ip_sign_ext:5; |
45 | u64 mispredict:1; |
46 | } split; |
47 | u64 full; |
48 | } from; |
49 | |
50 | union { |
51 | struct { |
52 | u64 ip:58; |
53 | u64 ip_sign_ext:3; |
54 | u64 reserved:1; |
55 | u64 spec:1; |
56 | u64 valid:1; |
57 | } split; |
58 | u64 full; |
59 | } to; |
60 | }; |
61 | |
62 | static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) |
63 | { |
64 | wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); |
65 | } |
66 | |
67 | static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) |
68 | { |
69 | wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); |
70 | } |
71 | |
72 | static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) |
73 | { |
74 | u64 val; |
75 | |
76 | rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); |
77 | |
78 | return val; |
79 | } |
80 | |
81 | static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) |
82 | { |
83 | u64 val; |
84 | |
85 | rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); |
86 | |
87 | return val; |
88 | } |
89 | |
90 | static __always_inline u64 sign_ext_branch_ip(u64 ip) |
91 | { |
92 | u32 shift = 64 - boot_cpu_data.x86_virt_bits; |
93 | |
94 | return (u64)(((s64)ip << shift) >> shift); |
95 | } |
96 | |
97 | static void amd_pmu_lbr_filter(void) |
98 | { |
99 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
100 | int br_sel = cpuc->br_sel, offset, type, i, j; |
101 | bool compress = false; |
102 | bool fused_only = false; |
103 | u64 from, to; |
104 | |
105 | /* If sampling all branches, there is nothing to filter */ |
106 | if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && |
107 | ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) |
108 | fused_only = true; |
109 | |
110 | for (i = 0; i < cpuc->lbr_stack.nr; i++) { |
111 | from = cpuc->lbr_entries[i].from; |
112 | to = cpuc->lbr_entries[i].to; |
113 | type = branch_type_fused(from, to, abort: 0, offset: &offset); |
114 | |
115 | /* |
116 | * Adjust the branch from address in case of instruction |
117 | * fusion where it points to an instruction preceding the |
118 | * actual branch |
119 | */ |
120 | if (offset) { |
121 | cpuc->lbr_entries[i].from += offset; |
122 | if (fused_only) |
123 | continue; |
124 | } |
125 | |
126 | /* If type does not correspond, then discard */ |
127 | if (type == X86_BR_NONE || (br_sel & type) != type) { |
128 | cpuc->lbr_entries[i].from = 0; /* mark invalid */ |
129 | compress = true; |
130 | } |
131 | |
132 | if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) |
133 | cpuc->lbr_entries[i].type = common_branch_type(type); |
134 | } |
135 | |
136 | if (!compress) |
137 | return; |
138 | |
139 | /* Remove all invalid entries */ |
140 | for (i = 0; i < cpuc->lbr_stack.nr; ) { |
141 | if (!cpuc->lbr_entries[i].from) { |
142 | j = i; |
143 | while (++j < cpuc->lbr_stack.nr) |
144 | cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j]; |
145 | cpuc->lbr_stack.nr--; |
146 | if (!cpuc->lbr_entries[i].from) |
147 | continue; |
148 | } |
149 | i++; |
150 | } |
151 | } |
152 | |
153 | static const int lbr_spec_map[PERF_BR_SPEC_MAX] = { |
154 | PERF_BR_SPEC_NA, |
155 | PERF_BR_SPEC_WRONG_PATH, |
156 | PERF_BR_NON_SPEC_CORRECT_PATH, |
157 | PERF_BR_SPEC_CORRECT_PATH, |
158 | }; |
159 | |
160 | void amd_pmu_lbr_read(void) |
161 | { |
162 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
163 | struct perf_branch_entry *br = cpuc->lbr_entries; |
164 | struct branch_entry entry; |
165 | int out = 0, idx, i; |
166 | |
167 | if (!cpuc->lbr_users) |
168 | return; |
169 | |
170 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
171 | entry.from.full = amd_pmu_lbr_get_from(idx: i); |
172 | entry.to.full = amd_pmu_lbr_get_to(idx: i); |
173 | |
174 | /* |
175 | * Check if a branch has been logged; if valid = 0, spec = 0 |
176 | * then no branch was recorded; if reserved = 1 then an |
177 | * erroneous branch was recorded (see Erratum 1452) |
178 | */ |
179 | if ((!entry.to.split.valid && !entry.to.split.spec) || |
180 | entry.to.split.reserved) |
181 | continue; |
182 | |
183 | perf_clear_branch_entry_bitfields(br: br + out); |
184 | |
185 | br[out].from = sign_ext_branch_ip(ip: entry.from.split.ip); |
186 | br[out].to = sign_ext_branch_ip(ip: entry.to.split.ip); |
187 | br[out].mispred = entry.from.split.mispredict; |
188 | br[out].predicted = !br[out].mispred; |
189 | |
190 | /* |
191 | * Set branch speculation information using the status of |
192 | * the valid and spec bits. |
193 | * |
194 | * When valid = 0, spec = 0, no branch was recorded and the |
195 | * entry is discarded as seen above. |
196 | * |
197 | * When valid = 0, spec = 1, the recorded branch was |
198 | * speculative but took the wrong path. |
199 | * |
200 | * When valid = 1, spec = 0, the recorded branch was |
201 | * non-speculative but took the correct path. |
202 | * |
203 | * When valid = 1, spec = 1, the recorded branch was |
204 | * speculative and took the correct path |
205 | */ |
206 | idx = (entry.to.split.valid << 1) | entry.to.split.spec; |
207 | br[out].spec = lbr_spec_map[idx]; |
208 | out++; |
209 | } |
210 | |
211 | cpuc->lbr_stack.nr = out; |
212 | |
213 | /* |
214 | * Internal register renaming always ensures that LBR From[0] and |
215 | * LBR To[0] always represent the TOS |
216 | */ |
217 | cpuc->lbr_stack.hw_idx = 0; |
218 | |
219 | /* Perform further software filtering */ |
220 | amd_pmu_lbr_filter(); |
221 | } |
222 | |
223 | static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { |
224 | [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, |
225 | [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, |
226 | [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE, |
227 | |
228 | [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, |
229 | [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, |
230 | [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, |
231 | [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, |
232 | [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP, |
233 | [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP, |
234 | [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP, |
235 | [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, |
236 | |
237 | [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP, |
238 | [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, |
239 | [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, |
240 | |
241 | [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP, |
242 | [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP, |
243 | }; |
244 | |
245 | static int amd_pmu_lbr_setup_filter(struct perf_event *event) |
246 | { |
247 | struct hw_perf_event_extra *reg = &event->hw.branch_reg; |
248 | u64 br_type = event->attr.branch_sample_type; |
249 | u64 mask = 0, v; |
250 | int i; |
251 | |
252 | /* No LBR support */ |
253 | if (!x86_pmu.lbr_nr) |
254 | return -EOPNOTSUPP; |
255 | |
256 | if (br_type & PERF_SAMPLE_BRANCH_USER) |
257 | mask |= X86_BR_USER; |
258 | |
259 | if (br_type & PERF_SAMPLE_BRANCH_KERNEL) |
260 | mask |= X86_BR_KERNEL; |
261 | |
262 | /* Ignore BRANCH_HV here */ |
263 | |
264 | if (br_type & PERF_SAMPLE_BRANCH_ANY) |
265 | mask |= X86_BR_ANY; |
266 | |
267 | if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) |
268 | mask |= X86_BR_ANY_CALL; |
269 | |
270 | if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) |
271 | mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; |
272 | |
273 | if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) |
274 | mask |= X86_BR_IND_CALL; |
275 | |
276 | if (br_type & PERF_SAMPLE_BRANCH_COND) |
277 | mask |= X86_BR_JCC; |
278 | |
279 | if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) |
280 | mask |= X86_BR_IND_JMP; |
281 | |
282 | if (br_type & PERF_SAMPLE_BRANCH_CALL) |
283 | mask |= X86_BR_CALL | X86_BR_ZERO_CALL; |
284 | |
285 | if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) |
286 | mask |= X86_BR_TYPE_SAVE; |
287 | |
288 | reg->reg = mask; |
289 | mask = 0; |
290 | |
291 | for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { |
292 | if (!(br_type & BIT_ULL(i))) |
293 | continue; |
294 | |
295 | v = lbr_select_map[i]; |
296 | if (v == LBR_NOT_SUPP) |
297 | return -EOPNOTSUPP; |
298 | |
299 | if (v != LBR_IGNORE) |
300 | mask |= v; |
301 | } |
302 | |
303 | /* Filter bits operate in suppress mode */ |
304 | reg->config = mask ^ LBR_SELECT_MASK; |
305 | |
306 | return 0; |
307 | } |
308 | |
309 | int amd_pmu_lbr_hw_config(struct perf_event *event) |
310 | { |
311 | int ret = 0; |
312 | |
313 | /* LBR is not recommended in counting mode */ |
314 | if (!is_sampling_event(event)) |
315 | return -EINVAL; |
316 | |
317 | ret = amd_pmu_lbr_setup_filter(event); |
318 | if (!ret) |
319 | event->attach_state |= PERF_ATTACH_SCHED_CB; |
320 | |
321 | return ret; |
322 | } |
323 | |
324 | void amd_pmu_lbr_reset(void) |
325 | { |
326 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
327 | int i; |
328 | |
329 | if (!x86_pmu.lbr_nr) |
330 | return; |
331 | |
332 | /* Reset all branch records individually */ |
333 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
334 | amd_pmu_lbr_set_from(idx: i, val: 0); |
335 | amd_pmu_lbr_set_to(idx: i, val: 0); |
336 | } |
337 | |
338 | cpuc->last_task_ctx = NULL; |
339 | cpuc->last_log_id = 0; |
340 | wrmsrl(MSR_AMD64_LBR_SELECT, val: 0); |
341 | } |
342 | |
343 | void amd_pmu_lbr_add(struct perf_event *event) |
344 | { |
345 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
346 | struct hw_perf_event_extra *reg = &event->hw.branch_reg; |
347 | |
348 | if (!x86_pmu.lbr_nr) |
349 | return; |
350 | |
351 | if (has_branch_stack(event)) { |
352 | cpuc->lbr_select = 1; |
353 | cpuc->lbr_sel->config = reg->config; |
354 | cpuc->br_sel = reg->reg; |
355 | } |
356 | |
357 | perf_sched_cb_inc(pmu: event->pmu); |
358 | |
359 | if (!cpuc->lbr_users++ && !event->total_time_running) |
360 | amd_pmu_lbr_reset(); |
361 | } |
362 | |
363 | void amd_pmu_lbr_del(struct perf_event *event) |
364 | { |
365 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
366 | |
367 | if (!x86_pmu.lbr_nr) |
368 | return; |
369 | |
370 | if (has_branch_stack(event)) |
371 | cpuc->lbr_select = 0; |
372 | |
373 | cpuc->lbr_users--; |
374 | WARN_ON_ONCE(cpuc->lbr_users < 0); |
375 | perf_sched_cb_dec(pmu: event->pmu); |
376 | } |
377 | |
378 | void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) |
379 | { |
380 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
381 | |
382 | /* |
383 | * A context switch can flip the address space and LBR entries are |
384 | * not tagged with an identifier. Hence, branches cannot be resolved |
385 | * from the old address space and the LBR records should be wiped. |
386 | */ |
387 | if (cpuc->lbr_users && sched_in) |
388 | amd_pmu_lbr_reset(); |
389 | } |
390 | |
391 | void amd_pmu_lbr_enable_all(void) |
392 | { |
393 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
394 | u64 lbr_select, dbg_ctl, dbg_extn_cfg; |
395 | |
396 | if (!cpuc->lbr_users || !x86_pmu.lbr_nr) |
397 | return; |
398 | |
399 | /* Set hardware branch filter */ |
400 | if (cpuc->lbr_select) { |
401 | lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK; |
402 | wrmsrl(MSR_AMD64_LBR_SELECT, val: lbr_select); |
403 | } |
404 | |
405 | if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { |
406 | rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); |
407 | wrmsrl(MSR_IA32_DEBUGCTLMSR, val: dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
408 | } |
409 | |
410 | rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); |
411 | wrmsrl(MSR_AMD_DBG_EXTN_CFG, val: dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); |
412 | } |
413 | |
414 | void amd_pmu_lbr_disable_all(void) |
415 | { |
416 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
417 | u64 dbg_ctl, dbg_extn_cfg; |
418 | |
419 | if (!cpuc->lbr_users || !x86_pmu.lbr_nr) |
420 | return; |
421 | |
422 | rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); |
423 | wrmsrl(MSR_AMD_DBG_EXTN_CFG, val: dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); |
424 | |
425 | if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { |
426 | rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); |
427 | wrmsrl(MSR_IA32_DEBUGCTLMSR, val: dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
428 | } |
429 | } |
430 | |
431 | __init int amd_pmu_lbr_init(void) |
432 | { |
433 | union cpuid_0x80000022_ebx ebx; |
434 | |
435 | if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2)) |
436 | return -EOPNOTSUPP; |
437 | |
438 | /* Set number of entries */ |
439 | ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); |
440 | x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz; |
441 | |
442 | pr_cont("%d-deep LBR, " , x86_pmu.lbr_nr); |
443 | |
444 | return 0; |
445 | } |
446 | |