1// SPDX-License-Identifier: GPL-2.0
2/*
3 * AMD specific. Provide textual annotation for IBS raw sample data.
4 */
5
6#include <unistd.h>
7#include <stdio.h>
8#include <string.h>
9#include <inttypes.h>
10
11#include <linux/string.h>
12#include "../../arch/x86/include/asm/amd-ibs.h"
13
14#include "debug.h"
15#include "session.h"
16#include "evlist.h"
17#include "sample-raw.h"
18#include "util/sample.h"
19
20static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
21static bool zen4_ibs_extensions;
22
23static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
24{
25 const char * const ic_miss_strs[] = {
26 " IcMiss 0",
27 " IcMiss 1",
28 };
29 const char * const l1tlb_pgsz_strs[] = {
30 " L1TlbPgSz 4KB",
31 " L1TlbPgSz 2MB",
32 " L1TlbPgSz 1GB",
33 " L1TlbPgSz RESERVED"
34 };
35 const char * const l1tlb_pgsz_strs_erratum1347[] = {
36 " L1TlbPgSz 4KB",
37 " L1TlbPgSz 16KB",
38 " L1TlbPgSz 2MB",
39 " L1TlbPgSz 1GB"
40 };
41 const char *ic_miss_str = NULL;
42 const char *l1tlb_pgsz_str = NULL;
43 char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = "";
44
45 if (cpu_family == 0x19 && cpu_model < 0x10) {
46 /*
47 * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
48 * Erratum #1347 workaround is to use table provided in erratum
49 */
50 if (reg.phy_addr_valid)
51 l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
52 } else {
53 if (reg.phy_addr_valid)
54 l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
55 ic_miss_str = ic_miss_strs[reg.ic_miss];
56 }
57
58 if (zen4_ibs_extensions) {
59 snprintf(buf: l3_miss_str, size: sizeof(l3_miss_str),
60 fmt: " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d",
61 reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss);
62 }
63
64 printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
65 "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n",
66 reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
67 reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
68 reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
69 reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "",
70 l3_miss_str);
71}
72
73static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
74{
75 printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
76}
77
78static void pr_ibs_op_ctl(union ibs_op_ctl reg)
79{
80 char l3_miss_only[sizeof(" L3MissOnly _")] = "";
81
82 if (zen4_ibs_extensions)
83 snprintf(buf: l3_miss_only, size: sizeof(l3_miss_only), fmt: " L3MissOnly %d", reg.l3_miss_only);
84
85 printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n",
86 reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
87 reg.op_en, reg.op_val, reg.cnt_ctl,
88 reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
89}
90
91static void pr_ibs_op_data(union ibs_op_data reg)
92{
93 printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
94 " RipInvalid %d BrnFuse %d Microcode %d\n",
95 reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
96 reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
97 reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
98 reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
99 reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
100}
101
102static void pr_ibs_op_data2_extended(union ibs_op_data2 reg)
103{
104 static const char * const data_src_str[] = {
105 "",
106 " DataSrc 1=Local L3 or other L1/L2 in CCX",
107 " DataSrc 2=Another CCX cache in the same NUMA node",
108 " DataSrc 3=DRAM",
109 " DataSrc 4=(reserved)",
110 " DataSrc 5=Another CCX cache in a different NUMA node",
111 " DataSrc 6=Long-latency DIMM",
112 " DataSrc 7=MMIO/Config/PCI/APIC",
113 " DataSrc 8=Extension Memory",
114 " DataSrc 9=(reserved)",
115 " DataSrc 10=(reserved)",
116 " DataSrc 11=(reserved)",
117 " DataSrc 12=Coherent Memory of a different processor type",
118 /* 13 to 31 are reserved. Avoid printing them. */
119 };
120 int data_src = (reg.data_src_hi << 3) | reg.data_src_lo;
121
122 printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
123 (data_src == 1 || data_src == 2 || data_src == 5) ?
124 (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "",
125 reg.rmt_node,
126 data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "");
127}
128
129static void pr_ibs_op_data2_default(union ibs_op_data2 reg)
130{
131 static const char * const data_src_str[] = {
132 "",
133 " DataSrc 1=(reserved)",
134 " DataSrc 2=Local node cache",
135 " DataSrc 3=DRAM",
136 " DataSrc 4=Remote node cache",
137 " DataSrc 5=(reserved)",
138 " DataSrc 6=(reserved)",
139 " DataSrc 7=Other"
140 };
141
142 printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
143 reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
144 : "CacheHitSt 0=M-state ") : "",
145 reg.rmt_node, data_src_str[reg.data_src_lo]);
146}
147
148static void pr_ibs_op_data2(union ibs_op_data2 reg)
149{
150 if (zen4_ibs_extensions)
151 return pr_ibs_op_data2_extended(reg);
152 pr_ibs_op_data2_default(reg);
153}
154
155static void pr_ibs_op_data3(union ibs_op_data3 reg)
156{
157 char l2_miss_str[sizeof(" L2Miss _")] = "";
158 char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
159 char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
160
161 /*
162 * Erratum #1293
163 * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
164 */
165 if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
166 snprintf(buf: l2_miss_str, size: sizeof(l2_miss_str), fmt: " L2Miss %d", reg.l2_miss);
167 snprintf(buf: op_dc_miss_open_mem_reqs_str, size: sizeof(op_dc_miss_open_mem_reqs_str),
168 fmt: " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
169 }
170
171 if (reg.op_mem_width)
172 snprintf(buf: op_mem_width_str, size: sizeof(op_mem_width_str),
173 fmt: " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
174
175 printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d "
176 "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d "
177 "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d "
178 "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n",
179 reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss,
180 reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss,
181 reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op,
182 reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid,
183 reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str,
184 op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat);
185}
186
187/*
188 * IBS Op/Execution MSRs always saved, in order, are:
189 * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
190 * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
191 */
192static void amd_dump_ibs_op(struct perf_sample *sample)
193{
194 struct perf_ibs_data *data = sample->raw_data;
195 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
196 __u64 *rip = (__u64 *)op_ctl + 1;
197 union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
198 union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
199
200 pr_ibs_op_ctl(reg: *op_ctl);
201 if (!op_data->op_rip_invalid)
202 printf("IbsOpRip:\t%016llx\n", *rip);
203 pr_ibs_op_data(reg: *op_data);
204 /*
205 * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
206 */
207 if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
208 (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
209 pr_ibs_op_data2(reg: *(union ibs_op_data2 *)(rip + 2));
210 pr_ibs_op_data3(reg: *op_data3);
211 if (op_data3->dc_lin_addr_valid)
212 printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
213 if (op_data3->dc_phy_addr_valid)
214 printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
215 if (op_data->op_brn_ret && *(rip + 6))
216 printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
217}
218
219/*
220 * IBS Fetch MSRs always saved, in order, are:
221 * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
222 */
223static void amd_dump_ibs_fetch(struct perf_sample *sample)
224{
225 struct perf_ibs_data *data = sample->raw_data;
226 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
227 __u64 *addr = (__u64 *)fetch_ctl + 1;
228 union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
229
230 pr_ibs_fetch_ctl(reg: *fetch_ctl);
231 printf("IbsFetchLinAd:\t%016llx\n", *addr++);
232 if (fetch_ctl->phy_addr_valid)
233 printf("IbsFetchPhysAd:\t%016llx\n", *addr);
234 pr_ic_ibs_extd_ctl(reg: *extd_ctl);
235}
236
237/*
238 * Test for enable and valid bits in captured control MSRs.
239 */
240static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
241{
242 struct perf_ibs_data *data = sample->raw_data;
243 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
244
245 if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
246 return true;
247
248 return false;
249}
250
251static bool is_valid_ibs_op_sample(struct perf_sample *sample)
252{
253 struct perf_ibs_data *data = sample->raw_data;
254 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
255
256 if (op_ctl->op_en && op_ctl->op_val)
257 return true;
258
259 return false;
260}
261
262/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
263 * and if the event was triggered by IBS, display its raw data with decoded text.
264 * The function is only invoked when the dump flag -D is set.
265 */
266void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
267 struct perf_sample *sample)
268{
269 struct evsel *evsel;
270
271 if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
272 return;
273
274 evsel = evlist__event2evsel(evlist, event);
275 if (!evsel)
276 return;
277
278 if (evsel->core.attr.type == ibs_fetch_type) {
279 if (!is_valid_ibs_fetch_sample(sample)) {
280 pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
281 return;
282 }
283 amd_dump_ibs_fetch(sample);
284 } else if (evsel->core.attr.type == ibs_op_type) {
285 if (!is_valid_ibs_op_sample(sample)) {
286 pr_debug("Invalid raw IBS Op MSR data encountered\n");
287 return;
288 }
289 amd_dump_ibs_op(sample);
290 }
291}
292
293static void parse_cpuid(struct perf_env *env)
294{
295 const char *cpuid;
296 int ret;
297
298 cpuid = perf_env__cpuid(env);
299 /*
300 * cpuid = "AuthenticAMD,family,model,stepping"
301 */
302 ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
303 if (ret != 2)
304 pr_debug("problem parsing cpuid\n");
305}
306
307/*
308 * Find and assign the type number used for ibs_op or ibs_fetch samples.
309 * Device names can be large - we are only interested in the first 9 characters,
310 * to match "ibs_fetch".
311 */
312bool evlist__has_amd_ibs(struct evlist *evlist)
313{
314 struct perf_env *env = evlist->env;
315 int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
316 const char *pmu_mapping = perf_env__pmu_mappings(env);
317 char name[sizeof("ibs_fetch")];
318 u32 type;
319
320 while (nr_pmu_mappings--) {
321 ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
322 if (ret == 2) {
323 if (strstarts(str: name, prefix: "ibs_op"))
324 ibs_op_type = type;
325 else if (strstarts(str: name, prefix: "ibs_fetch"))
326 ibs_fetch_type = type;
327 }
328 pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
329 }
330
331 if (perf_env__find_pmu_cap(env, pmu_name: "ibs_op", cap: "zen4_ibs_extensions"))
332 zen4_ibs_extensions = 1;
333
334 if (ibs_fetch_type || ibs_op_type) {
335 if (!cpu_family)
336 parse_cpuid(env);
337 return true;
338 }
339
340 return false;
341}
342

source code of linux/tools/perf/util/amd-sample-raw.c