1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * AMD specific. Provide textual annotation for IBS raw sample data. |
4 | */ |
5 | |
6 | #include <unistd.h> |
7 | #include <stdio.h> |
8 | #include <string.h> |
9 | #include <inttypes.h> |
10 | |
11 | #include <linux/string.h> |
12 | #include "../../arch/x86/include/asm/amd-ibs.h" |
13 | |
14 | #include "debug.h" |
15 | #include "session.h" |
16 | #include "evlist.h" |
17 | #include "sample-raw.h" |
18 | #include "util/sample.h" |
19 | |
20 | static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; |
21 | static bool zen4_ibs_extensions; |
22 | |
23 | static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) |
24 | { |
25 | const char * const ic_miss_strs[] = { |
26 | " IcMiss 0" , |
27 | " IcMiss 1" , |
28 | }; |
29 | const char * const l1tlb_pgsz_strs[] = { |
30 | " L1TlbPgSz 4KB" , |
31 | " L1TlbPgSz 2MB" , |
32 | " L1TlbPgSz 1GB" , |
33 | " L1TlbPgSz RESERVED" |
34 | }; |
35 | const char * const l1tlb_pgsz_strs_erratum1347[] = { |
36 | " L1TlbPgSz 4KB" , |
37 | " L1TlbPgSz 16KB" , |
38 | " L1TlbPgSz 2MB" , |
39 | " L1TlbPgSz 1GB" |
40 | }; |
41 | const char *ic_miss_str = NULL; |
42 | const char *l1tlb_pgsz_str = NULL; |
43 | char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _" )] = "" ; |
44 | |
45 | if (cpu_family == 0x19 && cpu_model < 0x10) { |
46 | /* |
47 | * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss] |
48 | * Erratum #1347 workaround is to use table provided in erratum |
49 | */ |
50 | if (reg.phy_addr_valid) |
51 | l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz]; |
52 | } else { |
53 | if (reg.phy_addr_valid) |
54 | l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz]; |
55 | ic_miss_str = ic_miss_strs[reg.ic_miss]; |
56 | } |
57 | |
58 | if (zen4_ibs_extensions) { |
59 | snprintf(buf: l3_miss_str, size: sizeof(l3_miss_str), |
60 | fmt: " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d" , |
61 | reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss); |
62 | } |
63 | |
64 | printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " |
65 | "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n" , |
66 | reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, |
67 | reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "" , |
68 | reg.phy_addr_valid, l1tlb_pgsz_str ? : "" , reg.l1tlb_miss, reg.l2tlb_miss, |
69 | reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0" ) : "" , |
70 | l3_miss_str); |
71 | } |
72 | |
73 | static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) |
74 | { |
75 | printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n" , reg.val, reg.itlb_refill_lat); |
76 | } |
77 | |
78 | static void pr_ibs_op_ctl(union ibs_op_ctl reg) |
79 | { |
80 | char l3_miss_only[sizeof(" L3MissOnly _" )] = "" ; |
81 | |
82 | if (zen4_ibs_extensions) |
83 | snprintf(buf: l3_miss_only, size: sizeof(l3_miss_only), fmt: " L3MissOnly %d" , reg.l3_miss_only); |
84 | |
85 | printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n" , |
86 | reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, |
87 | reg.op_en, reg.op_val, reg.cnt_ctl, |
88 | reg.cnt_ctl ? "uOps" : "cycles" , reg.opcurcnt); |
89 | } |
90 | |
91 | static void pr_ibs_op_data(union ibs_op_data reg) |
92 | { |
93 | printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d " |
94 | " RipInvalid %d BrnFuse %d Microcode %d\n" , |
95 | reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr, |
96 | reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0" ) : "" , |
97 | reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0" ) : "" , |
98 | reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0" ) : "" , |
99 | reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); |
100 | } |
101 | |
102 | static void pr_ibs_op_data2_extended(union ibs_op_data2 reg) |
103 | { |
104 | static const char * const data_src_str[] = { |
105 | "" , |
106 | " DataSrc 1=Local L3 or other L1/L2 in CCX" , |
107 | " DataSrc 2=Another CCX cache in the same NUMA node" , |
108 | " DataSrc 3=DRAM" , |
109 | " DataSrc 4=(reserved)" , |
110 | " DataSrc 5=Another CCX cache in a different NUMA node" , |
111 | " DataSrc 6=Long-latency DIMM" , |
112 | " DataSrc 7=MMIO/Config/PCI/APIC" , |
113 | " DataSrc 8=Extension Memory" , |
114 | " DataSrc 9=(reserved)" , |
115 | " DataSrc 10=(reserved)" , |
116 | " DataSrc 11=(reserved)" , |
117 | " DataSrc 12=Coherent Memory of a different processor type" , |
118 | /* 13 to 31 are reserved. Avoid printing them. */ |
119 | }; |
120 | int data_src = (reg.data_src_hi << 3) | reg.data_src_lo; |
121 | |
122 | printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n" , reg.val, |
123 | (data_src == 1 || data_src == 2 || data_src == 5) ? |
124 | (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state " ) : "" , |
125 | reg.rmt_node, |
126 | data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "" ); |
127 | } |
128 | |
129 | static void pr_ibs_op_data2_default(union ibs_op_data2 reg) |
130 | { |
131 | static const char * const data_src_str[] = { |
132 | "" , |
133 | " DataSrc 1=(reserved)" , |
134 | " DataSrc 2=Local node cache" , |
135 | " DataSrc 3=DRAM" , |
136 | " DataSrc 4=Remote node cache" , |
137 | " DataSrc 5=(reserved)" , |
138 | " DataSrc 6=(reserved)" , |
139 | " DataSrc 7=Other" |
140 | }; |
141 | |
142 | printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n" , reg.val, |
143 | reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " |
144 | : "CacheHitSt 0=M-state " ) : "" , |
145 | reg.rmt_node, data_src_str[reg.data_src_lo]); |
146 | } |
147 | |
148 | static void pr_ibs_op_data2(union ibs_op_data2 reg) |
149 | { |
150 | if (zen4_ibs_extensions) |
151 | return pr_ibs_op_data2_extended(reg); |
152 | pr_ibs_op_data2_default(reg); |
153 | } |
154 | |
155 | static void pr_ibs_op_data3(union ibs_op_data3 reg) |
156 | { |
157 | char l2_miss_str[sizeof(" L2Miss _" )] = "" ; |
158 | char op_mem_width_str[sizeof(" OpMemWidth _____ bytes" )] = "" ; |
159 | char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __" )] = "" ; |
160 | |
161 | /* |
162 | * Erratum #1293 |
163 | * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set |
164 | */ |
165 | if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) { |
166 | snprintf(buf: l2_miss_str, size: sizeof(l2_miss_str), fmt: " L2Miss %d" , reg.l2_miss); |
167 | snprintf(buf: op_dc_miss_open_mem_reqs_str, size: sizeof(op_dc_miss_open_mem_reqs_str), |
168 | fmt: " OpDcMissOpenMemReqs %2d" , reg.op_dc_miss_open_mem_reqs); |
169 | } |
170 | |
171 | if (reg.op_mem_width) |
172 | snprintf(buf: op_mem_width_str, size: sizeof(op_mem_width_str), |
173 | fmt: " OpMemWidth %2d bytes" , 1 << (reg.op_mem_width - 1)); |
174 | |
175 | printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " |
176 | "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " |
177 | "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " |
178 | "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n" , |
179 | reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, |
180 | reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, |
181 | reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, |
182 | reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, |
183 | reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, |
184 | op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); |
185 | } |
186 | |
187 | /* |
188 | * IBS Op/Execution MSRs always saved, in order, are: |
189 | * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2, |
190 | * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP |
191 | */ |
192 | static void amd_dump_ibs_op(struct perf_sample *sample) |
193 | { |
194 | struct perf_ibs_data *data = sample->raw_data; |
195 | union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; |
196 | __u64 *rip = (__u64 *)op_ctl + 1; |
197 | union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1); |
198 | union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3); |
199 | |
200 | pr_ibs_op_ctl(reg: *op_ctl); |
201 | if (!op_data->op_rip_invalid) |
202 | printf("IbsOpRip:\t%016llx\n" , *rip); |
203 | pr_ibs_op_data(reg: *op_data); |
204 | /* |
205 | * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set |
206 | */ |
207 | if (!(cpu_family == 0x19 && cpu_model < 0x10 && |
208 | (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf))) |
209 | pr_ibs_op_data2(reg: *(union ibs_op_data2 *)(rip + 2)); |
210 | pr_ibs_op_data3(reg: *op_data3); |
211 | if (op_data3->dc_lin_addr_valid) |
212 | printf("IbsDCLinAd:\t%016llx\n" , *(rip + 4)); |
213 | if (op_data3->dc_phy_addr_valid) |
214 | printf("IbsDCPhysAd:\t%016llx\n" , *(rip + 5)); |
215 | if (op_data->op_brn_ret && *(rip + 6)) |
216 | printf("IbsBrTarget:\t%016llx\n" , *(rip + 6)); |
217 | } |
218 | |
219 | /* |
220 | * IBS Fetch MSRs always saved, in order, are: |
221 | * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL |
222 | */ |
223 | static void amd_dump_ibs_fetch(struct perf_sample *sample) |
224 | { |
225 | struct perf_ibs_data *data = sample->raw_data; |
226 | union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; |
227 | __u64 *addr = (__u64 *)fetch_ctl + 1; |
228 | union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2; |
229 | |
230 | pr_ibs_fetch_ctl(reg: *fetch_ctl); |
231 | printf("IbsFetchLinAd:\t%016llx\n" , *addr++); |
232 | if (fetch_ctl->phy_addr_valid) |
233 | printf("IbsFetchPhysAd:\t%016llx\n" , *addr); |
234 | pr_ic_ibs_extd_ctl(reg: *extd_ctl); |
235 | } |
236 | |
237 | /* |
238 | * Test for enable and valid bits in captured control MSRs. |
239 | */ |
240 | static bool is_valid_ibs_fetch_sample(struct perf_sample *sample) |
241 | { |
242 | struct perf_ibs_data *data = sample->raw_data; |
243 | union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; |
244 | |
245 | if (fetch_ctl->fetch_en && fetch_ctl->fetch_val) |
246 | return true; |
247 | |
248 | return false; |
249 | } |
250 | |
251 | static bool is_valid_ibs_op_sample(struct perf_sample *sample) |
252 | { |
253 | struct perf_ibs_data *data = sample->raw_data; |
254 | union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; |
255 | |
256 | if (op_ctl->op_en && op_ctl->op_val) |
257 | return true; |
258 | |
259 | return false; |
260 | } |
261 | |
262 | /* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events |
263 | * and if the event was triggered by IBS, display its raw data with decoded text. |
264 | * The function is only invoked when the dump flag -D is set. |
265 | */ |
266 | void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, |
267 | struct perf_sample *sample) |
268 | { |
269 | struct evsel *evsel; |
270 | |
271 | if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size) |
272 | return; |
273 | |
274 | evsel = evlist__event2evsel(evlist, event); |
275 | if (!evsel) |
276 | return; |
277 | |
278 | if (evsel->core.attr.type == ibs_fetch_type) { |
279 | if (!is_valid_ibs_fetch_sample(sample)) { |
280 | pr_debug("Invalid raw IBS Fetch MSR data encountered\n" ); |
281 | return; |
282 | } |
283 | amd_dump_ibs_fetch(sample); |
284 | } else if (evsel->core.attr.type == ibs_op_type) { |
285 | if (!is_valid_ibs_op_sample(sample)) { |
286 | pr_debug("Invalid raw IBS Op MSR data encountered\n" ); |
287 | return; |
288 | } |
289 | amd_dump_ibs_op(sample); |
290 | } |
291 | } |
292 | |
293 | static void parse_cpuid(struct perf_env *env) |
294 | { |
295 | const char *cpuid; |
296 | int ret; |
297 | |
298 | cpuid = perf_env__cpuid(env); |
299 | /* |
300 | * cpuid = "AuthenticAMD,family,model,stepping" |
301 | */ |
302 | ret = sscanf(cpuid, "%*[^,],%u,%u" , &cpu_family, &cpu_model); |
303 | if (ret != 2) |
304 | pr_debug("problem parsing cpuid\n" ); |
305 | } |
306 | |
307 | /* |
308 | * Find and assign the type number used for ibs_op or ibs_fetch samples. |
309 | * Device names can be large - we are only interested in the first 9 characters, |
310 | * to match "ibs_fetch". |
311 | */ |
312 | bool evlist__has_amd_ibs(struct evlist *evlist) |
313 | { |
314 | struct perf_env *env = evlist->env; |
315 | int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); |
316 | const char *pmu_mapping = perf_env__pmu_mappings(env); |
317 | char name[sizeof("ibs_fetch" )]; |
318 | u32 type; |
319 | |
320 | while (nr_pmu_mappings--) { |
321 | ret = sscanf(pmu_mapping, "%u:%9s" , &type, name); |
322 | if (ret == 2) { |
323 | if (strstarts(str: name, prefix: "ibs_op" )) |
324 | ibs_op_type = type; |
325 | else if (strstarts(str: name, prefix: "ibs_fetch" )) |
326 | ibs_fetch_type = type; |
327 | } |
328 | pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; |
329 | } |
330 | |
331 | if (perf_env__find_pmu_cap(env, pmu_name: "ibs_op" , cap: "zen4_ibs_extensions" )) |
332 | zen4_ibs_extensions = 1; |
333 | |
334 | if (ibs_fetch_type || ibs_op_type) { |
335 | if (!cpu_family) |
336 | parse_cpuid(env); |
337 | return true; |
338 | } |
339 | |
340 | return false; |
341 | } |
342 | |