1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Arm Statistical Profiling Extensions (SPE) support |
4 | * Copyright (c) 2017-2018, Arm Ltd. |
5 | */ |
6 | |
7 | #include <byteswap.h> |
8 | #include <endian.h> |
9 | #include <errno.h> |
10 | #include <inttypes.h> |
11 | #include <linux/bitops.h> |
12 | #include <linux/kernel.h> |
13 | #include <linux/log2.h> |
14 | #include <linux/types.h> |
15 | #include <linux/zalloc.h> |
16 | #include <stdlib.h> |
17 | #include <unistd.h> |
18 | |
19 | #include "auxtrace.h" |
20 | #include "color.h" |
21 | #include "debug.h" |
22 | #include "evlist.h" |
23 | #include "evsel.h" |
24 | #include "machine.h" |
25 | #include "session.h" |
26 | #include "symbol.h" |
27 | #include "thread.h" |
28 | #include "thread-stack.h" |
29 | #include "tsc.h" |
30 | #include "tool.h" |
31 | #include "util/synthetic-events.h" |
32 | |
33 | #include "arm-spe.h" |
34 | #include "arm-spe-decoder/arm-spe-decoder.h" |
35 | #include "arm-spe-decoder/arm-spe-pkt-decoder.h" |
36 | |
37 | #include "../../arch/arm64/include/asm/cputype.h" |
38 | #define MAX_TIMESTAMP (~0ULL) |
39 | |
40 | struct arm_spe { |
41 | struct auxtrace auxtrace; |
42 | struct auxtrace_queues queues; |
43 | struct auxtrace_heap heap; |
44 | struct itrace_synth_opts synth_opts; |
45 | u32 auxtrace_type; |
46 | struct perf_session *session; |
47 | struct machine *machine; |
48 | u32 pmu_type; |
49 | u64 midr; |
50 | |
51 | struct perf_tsc_conversion tc; |
52 | |
53 | u8 timeless_decoding; |
54 | u8 data_queued; |
55 | |
56 | u64 sample_type; |
57 | u8 sample_flc; |
58 | u8 sample_llc; |
59 | u8 sample_tlb; |
60 | u8 sample_branch; |
61 | u8 sample_remote_access; |
62 | u8 sample_memory; |
63 | u8 sample_instructions; |
64 | u64 instructions_sample_period; |
65 | |
66 | u64 l1d_miss_id; |
67 | u64 l1d_access_id; |
68 | u64 llc_miss_id; |
69 | u64 llc_access_id; |
70 | u64 tlb_miss_id; |
71 | u64 tlb_access_id; |
72 | u64 branch_miss_id; |
73 | u64 remote_access_id; |
74 | u64 memory_id; |
75 | u64 instructions_id; |
76 | |
77 | u64 kernel_start; |
78 | |
79 | unsigned long num_events; |
80 | u8 use_ctx_pkt_for_pid; |
81 | }; |
82 | |
83 | struct arm_spe_queue { |
84 | struct arm_spe *spe; |
85 | unsigned int queue_nr; |
86 | struct auxtrace_buffer *buffer; |
87 | struct auxtrace_buffer *old_buffer; |
88 | union perf_event *event_buf; |
89 | bool on_heap; |
90 | bool done; |
91 | pid_t pid; |
92 | pid_t tid; |
93 | int cpu; |
94 | struct arm_spe_decoder *decoder; |
95 | u64 time; |
96 | u64 timestamp; |
97 | struct thread *thread; |
98 | u64 period_instructions; |
99 | }; |
100 | |
101 | static void arm_spe_dump(struct arm_spe *spe __maybe_unused, |
102 | unsigned char *buf, size_t len) |
103 | { |
104 | struct arm_spe_pkt packet; |
105 | size_t pos = 0; |
106 | int ret, pkt_len, i; |
107 | char desc[ARM_SPE_PKT_DESC_MAX]; |
108 | const char *color = PERF_COLOR_BLUE; |
109 | |
110 | color_fprintf(stdout, color, |
111 | ". ... ARM SPE data: size %#zx bytes\n" , |
112 | len); |
113 | |
114 | while (len) { |
115 | ret = arm_spe_get_packet(buf, len, packet: &packet); |
116 | if (ret > 0) |
117 | pkt_len = ret; |
118 | else |
119 | pkt_len = 1; |
120 | printf("." ); |
121 | color_fprintf(stdout, color, " %08x: " , pos); |
122 | for (i = 0; i < pkt_len; i++) |
123 | color_fprintf(stdout, color, " %02x" , buf[i]); |
124 | for (; i < 16; i++) |
125 | color_fprintf(stdout, color, " " ); |
126 | if (ret > 0) { |
127 | ret = arm_spe_pkt_desc(packet: &packet, buf: desc, |
128 | ARM_SPE_PKT_DESC_MAX); |
129 | if (!ret) |
130 | color_fprintf(stdout, color, " %s\n" , desc); |
131 | } else { |
132 | color_fprintf(stdout, color, " Bad packet!\n" ); |
133 | } |
134 | pos += pkt_len; |
135 | buf += pkt_len; |
136 | len -= pkt_len; |
137 | } |
138 | } |
139 | |
140 | static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, |
141 | size_t len) |
142 | { |
143 | printf(".\n" ); |
144 | arm_spe_dump(spe, buf, len); |
145 | } |
146 | |
147 | static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) |
148 | { |
149 | struct arm_spe_queue *speq = data; |
150 | struct auxtrace_buffer *buffer = speq->buffer; |
151 | struct auxtrace_buffer *old_buffer = speq->old_buffer; |
152 | struct auxtrace_queue *queue; |
153 | |
154 | queue = &speq->spe->queues.queue_array[speq->queue_nr]; |
155 | |
156 | buffer = auxtrace_buffer__next(queue, buffer); |
157 | /* If no more data, drop the previous auxtrace_buffer and return */ |
158 | if (!buffer) { |
159 | if (old_buffer) |
160 | auxtrace_buffer__drop_data(old_buffer); |
161 | b->len = 0; |
162 | return 0; |
163 | } |
164 | |
165 | speq->buffer = buffer; |
166 | |
167 | /* If the aux_buffer doesn't have data associated, try to load it */ |
168 | if (!buffer->data) { |
169 | /* get the file desc associated with the perf data file */ |
170 | int fd = perf_data__fd(data: speq->spe->session->data); |
171 | |
172 | buffer->data = auxtrace_buffer__get_data(buffer, fd); |
173 | if (!buffer->data) |
174 | return -ENOMEM; |
175 | } |
176 | |
177 | b->len = buffer->size; |
178 | b->buf = buffer->data; |
179 | |
180 | if (b->len) { |
181 | if (old_buffer) |
182 | auxtrace_buffer__drop_data(old_buffer); |
183 | speq->old_buffer = buffer; |
184 | } else { |
185 | auxtrace_buffer__drop_data(buffer); |
186 | return arm_spe_get_trace(b, data); |
187 | } |
188 | |
189 | return 0; |
190 | } |
191 | |
192 | static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, |
193 | unsigned int queue_nr) |
194 | { |
195 | struct arm_spe_params params = { .get_trace = 0, }; |
196 | struct arm_spe_queue *speq; |
197 | |
198 | speq = zalloc(sizeof(*speq)); |
199 | if (!speq) |
200 | return NULL; |
201 | |
202 | speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); |
203 | if (!speq->event_buf) |
204 | goto out_free; |
205 | |
206 | speq->spe = spe; |
207 | speq->queue_nr = queue_nr; |
208 | speq->pid = -1; |
209 | speq->tid = -1; |
210 | speq->cpu = -1; |
211 | speq->period_instructions = 0; |
212 | |
213 | /* params set */ |
214 | params.get_trace = arm_spe_get_trace; |
215 | params.data = speq; |
216 | |
217 | /* create new decoder */ |
218 | speq->decoder = arm_spe_decoder_new(params: ¶ms); |
219 | if (!speq->decoder) |
220 | goto out_free; |
221 | |
222 | return speq; |
223 | |
224 | out_free: |
225 | zfree(&speq->event_buf); |
226 | free(speq); |
227 | |
228 | return NULL; |
229 | } |
230 | |
231 | static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) |
232 | { |
233 | return ip >= spe->kernel_start ? |
234 | PERF_RECORD_MISC_KERNEL : |
235 | PERF_RECORD_MISC_USER; |
236 | } |
237 | |
238 | static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, |
239 | struct auxtrace_queue *queue) |
240 | { |
241 | struct arm_spe_queue *speq = queue->priv; |
242 | pid_t tid; |
243 | |
244 | tid = machine__get_current_tid(machine: spe->machine, cpu: speq->cpu); |
245 | if (tid != -1) { |
246 | speq->tid = tid; |
247 | thread__zput(speq->thread); |
248 | } else |
249 | speq->tid = queue->tid; |
250 | |
251 | if ((!speq->thread) && (speq->tid != -1)) { |
252 | speq->thread = machine__find_thread(machine: spe->machine, pid: -1, |
253 | tid: speq->tid); |
254 | } |
255 | |
256 | if (speq->thread) { |
257 | speq->pid = thread__pid(thread: speq->thread); |
258 | if (queue->cpu == -1) |
259 | speq->cpu = thread__cpu(thread: speq->thread); |
260 | } |
261 | } |
262 | |
263 | static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) |
264 | { |
265 | struct arm_spe *spe = speq->spe; |
266 | int err = machine__set_current_tid(machine: spe->machine, cpu: speq->cpu, pid: -1, tid); |
267 | |
268 | if (err) |
269 | return err; |
270 | |
271 | arm_spe_set_pid_tid_cpu(spe, queue: &spe->queues.queue_array[speq->queue_nr]); |
272 | |
273 | return 0; |
274 | } |
275 | |
276 | static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) |
277 | { |
278 | struct simd_flags simd_flags = {}; |
279 | |
280 | if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST)) |
281 | simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; |
282 | |
283 | if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER)) |
284 | simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; |
285 | |
286 | if (record->type & ARM_SPE_SVE_PARTIAL_PRED) |
287 | simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL; |
288 | |
289 | if (record->type & ARM_SPE_SVE_EMPTY_PRED) |
290 | simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY; |
291 | |
292 | return simd_flags; |
293 | } |
294 | |
295 | static void arm_spe_prep_sample(struct arm_spe *spe, |
296 | struct arm_spe_queue *speq, |
297 | union perf_event *event, |
298 | struct perf_sample *sample) |
299 | { |
300 | struct arm_spe_record *record = &speq->decoder->record; |
301 | |
302 | if (!spe->timeless_decoding) |
303 | sample->time = tsc_to_perf_time(cyc: record->timestamp, tc: &spe->tc); |
304 | |
305 | sample->ip = record->from_ip; |
306 | sample->cpumode = arm_spe_cpumode(spe, ip: sample->ip); |
307 | sample->pid = speq->pid; |
308 | sample->tid = speq->tid; |
309 | sample->period = 1; |
310 | sample->cpu = speq->cpu; |
311 | sample->simd_flags = arm_spe__synth_simd_flags(record); |
312 | |
313 | event->sample.header.type = PERF_RECORD_SAMPLE; |
314 | event->sample.header.misc = sample->cpumode; |
315 | event->sample.header.size = sizeof(struct perf_event_header); |
316 | } |
317 | |
318 | static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) |
319 | { |
320 | event->header.size = perf_event__sample_event_size(sample, type, 0); |
321 | return perf_event__synthesize_sample(event, type, 0, sample); |
322 | } |
323 | |
324 | static inline int |
325 | arm_spe_deliver_synth_event(struct arm_spe *spe, |
326 | struct arm_spe_queue *speq __maybe_unused, |
327 | union perf_event *event, |
328 | struct perf_sample *sample) |
329 | { |
330 | int ret; |
331 | |
332 | if (spe->synth_opts.inject) { |
333 | ret = arm_spe__inject_event(event, sample, type: spe->sample_type); |
334 | if (ret) |
335 | return ret; |
336 | } |
337 | |
338 | ret = perf_session__deliver_synth_event(session: spe->session, event, sample); |
339 | if (ret) |
340 | pr_err("ARM SPE: failed to deliver event, error %d\n" , ret); |
341 | |
342 | return ret; |
343 | } |
344 | |
345 | static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, |
346 | u64 spe_events_id, u64 data_src) |
347 | { |
348 | struct arm_spe *spe = speq->spe; |
349 | struct arm_spe_record *record = &speq->decoder->record; |
350 | union perf_event *event = speq->event_buf; |
351 | struct perf_sample sample = { .ip = 0, }; |
352 | |
353 | arm_spe_prep_sample(spe, speq, event, sample: &sample); |
354 | |
355 | sample.id = spe_events_id; |
356 | sample.stream_id = spe_events_id; |
357 | sample.addr = record->virt_addr; |
358 | sample.phys_addr = record->phys_addr; |
359 | sample.data_src = data_src; |
360 | sample.weight = record->latency; |
361 | |
362 | return arm_spe_deliver_synth_event(spe, speq, event, sample: &sample); |
363 | } |
364 | |
365 | static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, |
366 | u64 spe_events_id) |
367 | { |
368 | struct arm_spe *spe = speq->spe; |
369 | struct arm_spe_record *record = &speq->decoder->record; |
370 | union perf_event *event = speq->event_buf; |
371 | struct perf_sample sample = { .ip = 0, }; |
372 | |
373 | arm_spe_prep_sample(spe, speq, event, sample: &sample); |
374 | |
375 | sample.id = spe_events_id; |
376 | sample.stream_id = spe_events_id; |
377 | sample.addr = record->to_ip; |
378 | sample.weight = record->latency; |
379 | |
380 | return arm_spe_deliver_synth_event(spe, speq, event, sample: &sample); |
381 | } |
382 | |
383 | static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, |
384 | u64 spe_events_id, u64 data_src) |
385 | { |
386 | struct arm_spe *spe = speq->spe; |
387 | struct arm_spe_record *record = &speq->decoder->record; |
388 | union perf_event *event = speq->event_buf; |
389 | struct perf_sample sample = { .ip = 0, }; |
390 | |
391 | /* |
392 | * Handles perf instruction sampling period. |
393 | */ |
394 | speq->period_instructions++; |
395 | if (speq->period_instructions < spe->instructions_sample_period) |
396 | return 0; |
397 | speq->period_instructions = 0; |
398 | |
399 | arm_spe_prep_sample(spe, speq, event, sample: &sample); |
400 | |
401 | sample.id = spe_events_id; |
402 | sample.stream_id = spe_events_id; |
403 | sample.addr = record->virt_addr; |
404 | sample.phys_addr = record->phys_addr; |
405 | sample.data_src = data_src; |
406 | sample.period = spe->instructions_sample_period; |
407 | sample.weight = record->latency; |
408 | |
409 | return arm_spe_deliver_synth_event(spe, speq, event, sample: &sample); |
410 | } |
411 | |
412 | static const struct midr_range neoverse_spe[] = { |
413 | MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), |
414 | MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), |
415 | MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), |
416 | {}, |
417 | }; |
418 | |
419 | static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, |
420 | union perf_mem_data_src *data_src) |
421 | { |
422 | /* |
423 | * Even though four levels of cache hierarchy are possible, no known |
424 | * production Neoverse systems currently include more than three levels |
425 | * so for the time being we assume three exist. If a production system |
426 | * is built with four the this function would have to be changed to |
427 | * detect the number of levels for reporting. |
428 | */ |
429 | |
430 | /* |
431 | * We have no data on the hit level or data source for stores in the |
432 | * Neoverse SPE records. |
433 | */ |
434 | if (record->op & ARM_SPE_OP_ST) { |
435 | data_src->mem_lvl = PERF_MEM_LVL_NA; |
436 | data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; |
437 | data_src->mem_snoop = PERF_MEM_SNOOP_NA; |
438 | return; |
439 | } |
440 | |
441 | switch (record->source) { |
442 | case ARM_SPE_NV_L1D: |
443 | data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; |
444 | data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; |
445 | data_src->mem_snoop = PERF_MEM_SNOOP_NONE; |
446 | break; |
447 | case ARM_SPE_NV_L2: |
448 | data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; |
449 | data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; |
450 | data_src->mem_snoop = PERF_MEM_SNOOP_NONE; |
451 | break; |
452 | case ARM_SPE_NV_PEER_CORE: |
453 | data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; |
454 | data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; |
455 | data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; |
456 | break; |
457 | /* |
458 | * We don't know if this is L1, L2 but we do know it was a cache-2-cache |
459 | * transfer, so set SNOOPX_PEER |
460 | */ |
461 | case ARM_SPE_NV_LOCAL_CLUSTER: |
462 | case ARM_SPE_NV_PEER_CLUSTER: |
463 | data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; |
464 | data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; |
465 | data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; |
466 | break; |
467 | /* |
468 | * System cache is assumed to be L3 |
469 | */ |
470 | case ARM_SPE_NV_SYS_CACHE: |
471 | data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; |
472 | data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; |
473 | data_src->mem_snoop = PERF_MEM_SNOOP_HIT; |
474 | break; |
475 | /* |
476 | * We don't know what level it hit in, except it came from the other |
477 | * socket |
478 | */ |
479 | case ARM_SPE_NV_REMOTE: |
480 | data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; |
481 | data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; |
482 | data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; |
483 | data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; |
484 | break; |
485 | case ARM_SPE_NV_DRAM: |
486 | data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; |
487 | data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; |
488 | data_src->mem_snoop = PERF_MEM_SNOOP_NONE; |
489 | break; |
490 | default: |
491 | break; |
492 | } |
493 | } |
494 | |
495 | static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, |
496 | union perf_mem_data_src *data_src) |
497 | { |
498 | if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { |
499 | data_src->mem_lvl = PERF_MEM_LVL_L3; |
500 | |
501 | if (record->type & ARM_SPE_LLC_MISS) |
502 | data_src->mem_lvl |= PERF_MEM_LVL_MISS; |
503 | else |
504 | data_src->mem_lvl |= PERF_MEM_LVL_HIT; |
505 | } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { |
506 | data_src->mem_lvl = PERF_MEM_LVL_L1; |
507 | |
508 | if (record->type & ARM_SPE_L1D_MISS) |
509 | data_src->mem_lvl |= PERF_MEM_LVL_MISS; |
510 | else |
511 | data_src->mem_lvl |= PERF_MEM_LVL_HIT; |
512 | } |
513 | |
514 | if (record->type & ARM_SPE_REMOTE_ACCESS) |
515 | data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; |
516 | } |
517 | |
518 | static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) |
519 | { |
520 | union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; |
521 | bool is_neoverse = is_midr_in_range_list(midr, ranges: neoverse_spe); |
522 | |
523 | if (record->op & ARM_SPE_OP_LD) |
524 | data_src.mem_op = PERF_MEM_OP_LOAD; |
525 | else if (record->op & ARM_SPE_OP_ST) |
526 | data_src.mem_op = PERF_MEM_OP_STORE; |
527 | else |
528 | return 0; |
529 | |
530 | if (is_neoverse) |
531 | arm_spe__synth_data_source_neoverse(record, data_src: &data_src); |
532 | else |
533 | arm_spe__synth_data_source_generic(record, data_src: &data_src); |
534 | |
535 | if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { |
536 | data_src.mem_dtlb = PERF_MEM_TLB_WK; |
537 | |
538 | if (record->type & ARM_SPE_TLB_MISS) |
539 | data_src.mem_dtlb |= PERF_MEM_TLB_MISS; |
540 | else |
541 | data_src.mem_dtlb |= PERF_MEM_TLB_HIT; |
542 | } |
543 | |
544 | return data_src.val; |
545 | } |
546 | |
547 | static int arm_spe_sample(struct arm_spe_queue *speq) |
548 | { |
549 | const struct arm_spe_record *record = &speq->decoder->record; |
550 | struct arm_spe *spe = speq->spe; |
551 | u64 data_src; |
552 | int err; |
553 | |
554 | data_src = arm_spe__synth_data_source(record, midr: spe->midr); |
555 | |
556 | if (spe->sample_flc) { |
557 | if (record->type & ARM_SPE_L1D_MISS) { |
558 | err = arm_spe__synth_mem_sample(speq, spe_events_id: spe->l1d_miss_id, |
559 | data_src); |
560 | if (err) |
561 | return err; |
562 | } |
563 | |
564 | if (record->type & ARM_SPE_L1D_ACCESS) { |
565 | err = arm_spe__synth_mem_sample(speq, spe_events_id: spe->l1d_access_id, |
566 | data_src); |
567 | if (err) |
568 | return err; |
569 | } |
570 | } |
571 | |
572 | if (spe->sample_llc) { |
573 | if (record->type & ARM_SPE_LLC_MISS) { |
574 | err = arm_spe__synth_mem_sample(speq, spe_events_id: spe->llc_miss_id, |
575 | data_src); |
576 | if (err) |
577 | return err; |
578 | } |
579 | |
580 | if (record->type & ARM_SPE_LLC_ACCESS) { |
581 | err = arm_spe__synth_mem_sample(speq, spe_events_id: spe->llc_access_id, |
582 | data_src); |
583 | if (err) |
584 | return err; |
585 | } |
586 | } |
587 | |
588 | if (spe->sample_tlb) { |
589 | if (record->type & ARM_SPE_TLB_MISS) { |
590 | err = arm_spe__synth_mem_sample(speq, spe_events_id: spe->tlb_miss_id, |
591 | data_src); |
592 | if (err) |
593 | return err; |
594 | } |
595 | |
596 | if (record->type & ARM_SPE_TLB_ACCESS) { |
597 | err = arm_spe__synth_mem_sample(speq, spe_events_id: spe->tlb_access_id, |
598 | data_src); |
599 | if (err) |
600 | return err; |
601 | } |
602 | } |
603 | |
604 | if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { |
605 | err = arm_spe__synth_branch_sample(speq, spe_events_id: spe->branch_miss_id); |
606 | if (err) |
607 | return err; |
608 | } |
609 | |
610 | if (spe->sample_remote_access && |
611 | (record->type & ARM_SPE_REMOTE_ACCESS)) { |
612 | err = arm_spe__synth_mem_sample(speq, spe_events_id: spe->remote_access_id, |
613 | data_src); |
614 | if (err) |
615 | return err; |
616 | } |
617 | |
618 | /* |
619 | * When data_src is zero it means the record is not a memory operation, |
620 | * skip to synthesize memory sample for this case. |
621 | */ |
622 | if (spe->sample_memory && data_src) { |
623 | err = arm_spe__synth_mem_sample(speq, spe_events_id: spe->memory_id, data_src); |
624 | if (err) |
625 | return err; |
626 | } |
627 | |
628 | if (spe->sample_instructions) { |
629 | err = arm_spe__synth_instruction_sample(speq, spe_events_id: spe->instructions_id, data_src); |
630 | if (err) |
631 | return err; |
632 | } |
633 | |
634 | return 0; |
635 | } |
636 | |
637 | static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) |
638 | { |
639 | struct arm_spe *spe = speq->spe; |
640 | struct arm_spe_record *record; |
641 | int ret; |
642 | |
643 | if (!spe->kernel_start) |
644 | spe->kernel_start = machine__kernel_start(machine: spe->machine); |
645 | |
646 | while (1) { |
647 | /* |
648 | * The usual logic is firstly to decode the packets, and then |
649 | * based the record to synthesize sample; but here the flow is |
650 | * reversed: it calls arm_spe_sample() for synthesizing samples |
651 | * prior to arm_spe_decode(). |
652 | * |
653 | * Two reasons for this code logic: |
654 | * 1. Firstly, when setup queue in arm_spe__setup_queue(), it |
655 | * has decoded trace data and generated a record, but the record |
656 | * is left to generate sample until run to here, so it's correct |
657 | * to synthesize sample for the left record. |
658 | * 2. After decoding trace data, it needs to compare the record |
659 | * timestamp with the coming perf event, if the record timestamp |
660 | * is later than the perf event, it needs bail out and pushs the |
661 | * record into auxtrace heap, thus the record can be deferred to |
662 | * synthesize sample until run to here at the next time; so this |
663 | * can correlate samples between Arm SPE trace data and other |
664 | * perf events with correct time ordering. |
665 | */ |
666 | |
667 | /* |
668 | * Update pid/tid info. |
669 | */ |
670 | record = &speq->decoder->record; |
671 | if (!spe->timeless_decoding && record->context_id != (u64)-1) { |
672 | ret = arm_spe_set_tid(speq, tid: record->context_id); |
673 | if (ret) |
674 | return ret; |
675 | |
676 | spe->use_ctx_pkt_for_pid = true; |
677 | } |
678 | |
679 | ret = arm_spe_sample(speq); |
680 | if (ret) |
681 | return ret; |
682 | |
683 | ret = arm_spe_decode(decoder: speq->decoder); |
684 | if (!ret) { |
685 | pr_debug("No data or all data has been processed.\n" ); |
686 | return 1; |
687 | } |
688 | |
689 | /* |
690 | * Error is detected when decode SPE trace data, continue to |
691 | * the next trace data and find out more records. |
692 | */ |
693 | if (ret < 0) |
694 | continue; |
695 | |
696 | record = &speq->decoder->record; |
697 | |
698 | /* Update timestamp for the last record */ |
699 | if (record->timestamp > speq->timestamp) |
700 | speq->timestamp = record->timestamp; |
701 | |
702 | /* |
703 | * If the timestamp of the queue is later than timestamp of the |
704 | * coming perf event, bail out so can allow the perf event to |
705 | * be processed ahead. |
706 | */ |
707 | if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { |
708 | *timestamp = speq->timestamp; |
709 | return 0; |
710 | } |
711 | } |
712 | |
713 | return 0; |
714 | } |
715 | |
716 | static int arm_spe__setup_queue(struct arm_spe *spe, |
717 | struct auxtrace_queue *queue, |
718 | unsigned int queue_nr) |
719 | { |
720 | struct arm_spe_queue *speq = queue->priv; |
721 | struct arm_spe_record *record; |
722 | |
723 | if (list_empty(head: &queue->head) || speq) |
724 | return 0; |
725 | |
726 | speq = arm_spe__alloc_queue(spe, queue_nr); |
727 | |
728 | if (!speq) |
729 | return -ENOMEM; |
730 | |
731 | queue->priv = speq; |
732 | |
733 | if (queue->cpu != -1) |
734 | speq->cpu = queue->cpu; |
735 | |
736 | if (!speq->on_heap) { |
737 | int ret; |
738 | |
739 | if (spe->timeless_decoding) |
740 | return 0; |
741 | |
742 | retry: |
743 | ret = arm_spe_decode(decoder: speq->decoder); |
744 | |
745 | if (!ret) |
746 | return 0; |
747 | |
748 | if (ret < 0) |
749 | goto retry; |
750 | |
751 | record = &speq->decoder->record; |
752 | |
753 | speq->timestamp = record->timestamp; |
754 | ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); |
755 | if (ret) |
756 | return ret; |
757 | speq->on_heap = true; |
758 | } |
759 | |
760 | return 0; |
761 | } |
762 | |
763 | static int arm_spe__setup_queues(struct arm_spe *spe) |
764 | { |
765 | unsigned int i; |
766 | int ret; |
767 | |
768 | for (i = 0; i < spe->queues.nr_queues; i++) { |
769 | ret = arm_spe__setup_queue(spe, queue: &spe->queues.queue_array[i], queue_nr: i); |
770 | if (ret) |
771 | return ret; |
772 | } |
773 | |
774 | return 0; |
775 | } |
776 | |
777 | static int arm_spe__update_queues(struct arm_spe *spe) |
778 | { |
779 | if (spe->queues.new_data) { |
780 | spe->queues.new_data = false; |
781 | return arm_spe__setup_queues(spe); |
782 | } |
783 | |
784 | return 0; |
785 | } |
786 | |
787 | static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) |
788 | { |
789 | struct evsel *evsel; |
790 | struct evlist *evlist = spe->session->evlist; |
791 | bool timeless_decoding = true; |
792 | |
793 | /* |
794 | * Circle through the list of event and complain if we find one |
795 | * with the time bit set. |
796 | */ |
797 | evlist__for_each_entry(evlist, evsel) { |
798 | if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) |
799 | timeless_decoding = false; |
800 | } |
801 | |
802 | return timeless_decoding; |
803 | } |
804 | |
805 | static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) |
806 | { |
807 | unsigned int queue_nr; |
808 | u64 ts; |
809 | int ret; |
810 | |
811 | while (1) { |
812 | struct auxtrace_queue *queue; |
813 | struct arm_spe_queue *speq; |
814 | |
815 | if (!spe->heap.heap_cnt) |
816 | return 0; |
817 | |
818 | if (spe->heap.heap_array[0].ordinal >= timestamp) |
819 | return 0; |
820 | |
821 | queue_nr = spe->heap.heap_array[0].queue_nr; |
822 | queue = &spe->queues.queue_array[queue_nr]; |
823 | speq = queue->priv; |
824 | |
825 | auxtrace_heap__pop(&spe->heap); |
826 | |
827 | if (spe->heap.heap_cnt) { |
828 | ts = spe->heap.heap_array[0].ordinal + 1; |
829 | if (ts > timestamp) |
830 | ts = timestamp; |
831 | } else { |
832 | ts = timestamp; |
833 | } |
834 | |
835 | /* |
836 | * A previous context-switch event has set pid/tid in the machine's context, so |
837 | * here we need to update the pid/tid in the thread and SPE queue. |
838 | */ |
839 | if (!spe->use_ctx_pkt_for_pid) |
840 | arm_spe_set_pid_tid_cpu(spe, queue); |
841 | |
842 | ret = arm_spe_run_decoder(speq, timestamp: &ts); |
843 | if (ret < 0) { |
844 | auxtrace_heap__add(&spe->heap, queue_nr, ts); |
845 | return ret; |
846 | } |
847 | |
848 | if (!ret) { |
849 | ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); |
850 | if (ret < 0) |
851 | return ret; |
852 | } else { |
853 | speq->on_heap = false; |
854 | } |
855 | } |
856 | |
857 | return 0; |
858 | } |
859 | |
860 | static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, |
861 | u64 time_) |
862 | { |
863 | struct auxtrace_queues *queues = &spe->queues; |
864 | unsigned int i; |
865 | u64 ts = 0; |
866 | |
867 | for (i = 0; i < queues->nr_queues; i++) { |
868 | struct auxtrace_queue *queue = &spe->queues.queue_array[i]; |
869 | struct arm_spe_queue *speq = queue->priv; |
870 | |
871 | if (speq && (tid == -1 || speq->tid == tid)) { |
872 | speq->time = time_; |
873 | arm_spe_set_pid_tid_cpu(spe, queue); |
874 | arm_spe_run_decoder(speq, timestamp: &ts); |
875 | } |
876 | } |
877 | return 0; |
878 | } |
879 | |
880 | static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, |
881 | struct perf_sample *sample) |
882 | { |
883 | pid_t pid, tid; |
884 | int cpu; |
885 | |
886 | if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) |
887 | return 0; |
888 | |
889 | pid = event->context_switch.next_prev_pid; |
890 | tid = event->context_switch.next_prev_tid; |
891 | cpu = sample->cpu; |
892 | |
893 | if (tid == -1) |
894 | pr_warning("context_switch event has no tid\n" ); |
895 | |
896 | return machine__set_current_tid(machine: spe->machine, cpu, pid, tid); |
897 | } |
898 | |
899 | static int arm_spe_process_event(struct perf_session *session, |
900 | union perf_event *event, |
901 | struct perf_sample *sample, |
902 | struct perf_tool *tool) |
903 | { |
904 | int err = 0; |
905 | u64 timestamp; |
906 | struct arm_spe *spe = container_of(session->auxtrace, |
907 | struct arm_spe, auxtrace); |
908 | |
909 | if (dump_trace) |
910 | return 0; |
911 | |
912 | if (!tool->ordered_events) { |
913 | pr_err("SPE trace requires ordered events\n" ); |
914 | return -EINVAL; |
915 | } |
916 | |
917 | if (sample->time && (sample->time != (u64) -1)) |
918 | timestamp = perf_time_to_tsc(ns: sample->time, tc: &spe->tc); |
919 | else |
920 | timestamp = 0; |
921 | |
922 | if (timestamp || spe->timeless_decoding) { |
923 | err = arm_spe__update_queues(spe); |
924 | if (err) |
925 | return err; |
926 | } |
927 | |
928 | if (spe->timeless_decoding) { |
929 | if (event->header.type == PERF_RECORD_EXIT) { |
930 | err = arm_spe_process_timeless_queues(spe, |
931 | tid: event->fork.tid, |
932 | time_: sample->time); |
933 | } |
934 | } else if (timestamp) { |
935 | err = arm_spe_process_queues(spe, timestamp); |
936 | if (err) |
937 | return err; |
938 | |
939 | if (!spe->use_ctx_pkt_for_pid && |
940 | (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || |
941 | event->header.type == PERF_RECORD_SWITCH)) |
942 | err = arm_spe_context_switch(spe, event, sample); |
943 | } |
944 | |
945 | return err; |
946 | } |
947 | |
948 | static int arm_spe_process_auxtrace_event(struct perf_session *session, |
949 | union perf_event *event, |
950 | struct perf_tool *tool __maybe_unused) |
951 | { |
952 | struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, |
953 | auxtrace); |
954 | |
955 | if (!spe->data_queued) { |
956 | struct auxtrace_buffer *buffer; |
957 | off_t data_offset; |
958 | int fd = perf_data__fd(data: session->data); |
959 | int err; |
960 | |
961 | if (perf_data__is_pipe(data: session->data)) { |
962 | data_offset = 0; |
963 | } else { |
964 | data_offset = lseek(fd, 0, SEEK_CUR); |
965 | if (data_offset == -1) |
966 | return -errno; |
967 | } |
968 | |
969 | err = auxtrace_queues__add_event(&spe->queues, session, event, |
970 | data_offset, &buffer); |
971 | if (err) |
972 | return err; |
973 | |
974 | /* Dump here now we have copied a piped trace out of the pipe */ |
975 | if (dump_trace) { |
976 | if (auxtrace_buffer__get_data(buffer, fd)) { |
977 | arm_spe_dump_event(spe, buf: buffer->data, |
978 | len: buffer->size); |
979 | auxtrace_buffer__put_data(buffer); |
980 | } |
981 | } |
982 | } |
983 | |
984 | return 0; |
985 | } |
986 | |
987 | static int arm_spe_flush(struct perf_session *session __maybe_unused, |
988 | struct perf_tool *tool __maybe_unused) |
989 | { |
990 | struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, |
991 | auxtrace); |
992 | int ret; |
993 | |
994 | if (dump_trace) |
995 | return 0; |
996 | |
997 | if (!tool->ordered_events) |
998 | return -EINVAL; |
999 | |
1000 | ret = arm_spe__update_queues(spe); |
1001 | if (ret < 0) |
1002 | return ret; |
1003 | |
1004 | if (spe->timeless_decoding) |
1005 | return arm_spe_process_timeless_queues(spe, tid: -1, |
1006 | MAX_TIMESTAMP - 1); |
1007 | |
1008 | ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); |
1009 | if (ret) |
1010 | return ret; |
1011 | |
1012 | if (!spe->use_ctx_pkt_for_pid) |
1013 | ui__warning(format: "Arm SPE CONTEXT packets not found in the traces.\n" |
1014 | "Matching of TIDs to SPE events could be inaccurate.\n" ); |
1015 | |
1016 | return 0; |
1017 | } |
1018 | |
1019 | static void arm_spe_free_queue(void *priv) |
1020 | { |
1021 | struct arm_spe_queue *speq = priv; |
1022 | |
1023 | if (!speq) |
1024 | return; |
1025 | thread__zput(speq->thread); |
1026 | arm_spe_decoder_free(decoder: speq->decoder); |
1027 | zfree(&speq->event_buf); |
1028 | free(speq); |
1029 | } |
1030 | |
1031 | static void arm_spe_free_events(struct perf_session *session) |
1032 | { |
1033 | struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, |
1034 | auxtrace); |
1035 | struct auxtrace_queues *queues = &spe->queues; |
1036 | unsigned int i; |
1037 | |
1038 | for (i = 0; i < queues->nr_queues; i++) { |
1039 | arm_spe_free_queue(priv: queues->queue_array[i].priv); |
1040 | queues->queue_array[i].priv = NULL; |
1041 | } |
1042 | auxtrace_queues__free(queues); |
1043 | } |
1044 | |
1045 | static void arm_spe_free(struct perf_session *session) |
1046 | { |
1047 | struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, |
1048 | auxtrace); |
1049 | |
1050 | auxtrace_heap__free(&spe->heap); |
1051 | arm_spe_free_events(session); |
1052 | session->auxtrace = NULL; |
1053 | free(spe); |
1054 | } |
1055 | |
1056 | static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, |
1057 | struct evsel *evsel) |
1058 | { |
1059 | struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); |
1060 | |
1061 | return evsel->core.attr.type == spe->pmu_type; |
1062 | } |
1063 | |
1064 | static const char * const arm_spe_info_fmts[] = { |
1065 | [ARM_SPE_PMU_TYPE] = " PMU Type %" PRId64"\n" , |
1066 | }; |
1067 | |
1068 | static void arm_spe_print_info(__u64 *arr) |
1069 | { |
1070 | if (!dump_trace) |
1071 | return; |
1072 | |
1073 | fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); |
1074 | } |
1075 | |
1076 | struct arm_spe_synth { |
1077 | struct perf_tool dummy_tool; |
1078 | struct perf_session *session; |
1079 | }; |
1080 | |
1081 | static int arm_spe_event_synth(struct perf_tool *tool, |
1082 | union perf_event *event, |
1083 | struct perf_sample *sample __maybe_unused, |
1084 | struct machine *machine __maybe_unused) |
1085 | { |
1086 | struct arm_spe_synth *arm_spe_synth = |
1087 | container_of(tool, struct arm_spe_synth, dummy_tool); |
1088 | |
1089 | return perf_session__deliver_synth_event(session: arm_spe_synth->session, |
1090 | event, NULL); |
1091 | } |
1092 | |
1093 | static int arm_spe_synth_event(struct perf_session *session, |
1094 | struct perf_event_attr *attr, u64 id) |
1095 | { |
1096 | struct arm_spe_synth arm_spe_synth; |
1097 | |
1098 | memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); |
1099 | arm_spe_synth.session = session; |
1100 | |
1101 | return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, |
1102 | &id, arm_spe_event_synth); |
1103 | } |
1104 | |
1105 | static void arm_spe_set_event_name(struct evlist *evlist, u64 id, |
1106 | const char *name) |
1107 | { |
1108 | struct evsel *evsel; |
1109 | |
1110 | evlist__for_each_entry(evlist, evsel) { |
1111 | if (evsel->core.id && evsel->core.id[0] == id) { |
1112 | if (evsel->name) |
1113 | zfree(&evsel->name); |
1114 | evsel->name = strdup(name); |
1115 | break; |
1116 | } |
1117 | } |
1118 | } |
1119 | |
1120 | static int |
1121 | arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) |
1122 | { |
1123 | struct evlist *evlist = session->evlist; |
1124 | struct evsel *evsel; |
1125 | struct perf_event_attr attr; |
1126 | bool found = false; |
1127 | u64 id; |
1128 | int err; |
1129 | |
1130 | evlist__for_each_entry(evlist, evsel) { |
1131 | if (evsel->core.attr.type == spe->pmu_type) { |
1132 | found = true; |
1133 | break; |
1134 | } |
1135 | } |
1136 | |
1137 | if (!found) { |
1138 | pr_debug("No selected events with SPE trace data\n" ); |
1139 | return 0; |
1140 | } |
1141 | |
1142 | memset(&attr, 0, sizeof(struct perf_event_attr)); |
1143 | attr.size = sizeof(struct perf_event_attr); |
1144 | attr.type = PERF_TYPE_HARDWARE; |
1145 | attr.sample_type = evsel->core.attr.sample_type & |
1146 | (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); |
1147 | attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | |
1148 | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | |
1149 | PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; |
1150 | if (spe->timeless_decoding) |
1151 | attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; |
1152 | else |
1153 | attr.sample_type |= PERF_SAMPLE_TIME; |
1154 | |
1155 | spe->sample_type = attr.sample_type; |
1156 | |
1157 | attr.exclude_user = evsel->core.attr.exclude_user; |
1158 | attr.exclude_kernel = evsel->core.attr.exclude_kernel; |
1159 | attr.exclude_hv = evsel->core.attr.exclude_hv; |
1160 | attr.exclude_host = evsel->core.attr.exclude_host; |
1161 | attr.exclude_guest = evsel->core.attr.exclude_guest; |
1162 | attr.sample_id_all = evsel->core.attr.sample_id_all; |
1163 | attr.read_format = evsel->core.attr.read_format; |
1164 | |
1165 | /* create new id val to be a fixed offset from evsel id */ |
1166 | id = evsel->core.id[0] + 1000000000; |
1167 | |
1168 | if (!id) |
1169 | id = 1; |
1170 | |
1171 | if (spe->synth_opts.flc) { |
1172 | spe->sample_flc = true; |
1173 | |
1174 | /* Level 1 data cache miss */ |
1175 | err = arm_spe_synth_event(session, attr: &attr, id); |
1176 | if (err) |
1177 | return err; |
1178 | spe->l1d_miss_id = id; |
1179 | arm_spe_set_event_name(evlist, id, name: "l1d-miss" ); |
1180 | id += 1; |
1181 | |
1182 | /* Level 1 data cache access */ |
1183 | err = arm_spe_synth_event(session, attr: &attr, id); |
1184 | if (err) |
1185 | return err; |
1186 | spe->l1d_access_id = id; |
1187 | arm_spe_set_event_name(evlist, id, name: "l1d-access" ); |
1188 | id += 1; |
1189 | } |
1190 | |
1191 | if (spe->synth_opts.llc) { |
1192 | spe->sample_llc = true; |
1193 | |
1194 | /* Last level cache miss */ |
1195 | err = arm_spe_synth_event(session, attr: &attr, id); |
1196 | if (err) |
1197 | return err; |
1198 | spe->llc_miss_id = id; |
1199 | arm_spe_set_event_name(evlist, id, name: "llc-miss" ); |
1200 | id += 1; |
1201 | |
1202 | /* Last level cache access */ |
1203 | err = arm_spe_synth_event(session, attr: &attr, id); |
1204 | if (err) |
1205 | return err; |
1206 | spe->llc_access_id = id; |
1207 | arm_spe_set_event_name(evlist, id, name: "llc-access" ); |
1208 | id += 1; |
1209 | } |
1210 | |
1211 | if (spe->synth_opts.tlb) { |
1212 | spe->sample_tlb = true; |
1213 | |
1214 | /* TLB miss */ |
1215 | err = arm_spe_synth_event(session, attr: &attr, id); |
1216 | if (err) |
1217 | return err; |
1218 | spe->tlb_miss_id = id; |
1219 | arm_spe_set_event_name(evlist, id, name: "tlb-miss" ); |
1220 | id += 1; |
1221 | |
1222 | /* TLB access */ |
1223 | err = arm_spe_synth_event(session, attr: &attr, id); |
1224 | if (err) |
1225 | return err; |
1226 | spe->tlb_access_id = id; |
1227 | arm_spe_set_event_name(evlist, id, name: "tlb-access" ); |
1228 | id += 1; |
1229 | } |
1230 | |
1231 | if (spe->synth_opts.branches) { |
1232 | spe->sample_branch = true; |
1233 | |
1234 | /* Branch miss */ |
1235 | err = arm_spe_synth_event(session, attr: &attr, id); |
1236 | if (err) |
1237 | return err; |
1238 | spe->branch_miss_id = id; |
1239 | arm_spe_set_event_name(evlist, id, name: "branch-miss" ); |
1240 | id += 1; |
1241 | } |
1242 | |
1243 | if (spe->synth_opts.remote_access) { |
1244 | spe->sample_remote_access = true; |
1245 | |
1246 | /* Remote access */ |
1247 | err = arm_spe_synth_event(session, attr: &attr, id); |
1248 | if (err) |
1249 | return err; |
1250 | spe->remote_access_id = id; |
1251 | arm_spe_set_event_name(evlist, id, name: "remote-access" ); |
1252 | id += 1; |
1253 | } |
1254 | |
1255 | if (spe->synth_opts.mem) { |
1256 | spe->sample_memory = true; |
1257 | |
1258 | err = arm_spe_synth_event(session, attr: &attr, id); |
1259 | if (err) |
1260 | return err; |
1261 | spe->memory_id = id; |
1262 | arm_spe_set_event_name(evlist, id, name: "memory" ); |
1263 | id += 1; |
1264 | } |
1265 | |
1266 | if (spe->synth_opts.instructions) { |
1267 | if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { |
1268 | pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n" ); |
1269 | goto synth_instructions_out; |
1270 | } |
1271 | if (spe->synth_opts.period > 1) |
1272 | pr_warning("Arm SPE has a hardware-based sample period.\n" |
1273 | "Additional instruction events will be discarded by --itrace\n" ); |
1274 | |
1275 | spe->sample_instructions = true; |
1276 | attr.config = PERF_COUNT_HW_INSTRUCTIONS; |
1277 | attr.sample_period = spe->synth_opts.period; |
1278 | spe->instructions_sample_period = attr.sample_period; |
1279 | err = arm_spe_synth_event(session, attr: &attr, id); |
1280 | if (err) |
1281 | return err; |
1282 | spe->instructions_id = id; |
1283 | arm_spe_set_event_name(evlist, id, name: "instructions" ); |
1284 | } |
1285 | synth_instructions_out: |
1286 | |
1287 | return 0; |
1288 | } |
1289 | |
1290 | int arm_spe_process_auxtrace_info(union perf_event *event, |
1291 | struct perf_session *session) |
1292 | { |
1293 | struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; |
1294 | size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; |
1295 | struct perf_record_time_conv *tc = &session->time_conv; |
1296 | const char *cpuid = perf_env__cpuid(env: session->evlist->env); |
1297 | u64 midr = strtol(cpuid, NULL, 16); |
1298 | struct arm_spe *spe; |
1299 | int err; |
1300 | |
1301 | if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + |
1302 | min_sz) |
1303 | return -EINVAL; |
1304 | |
1305 | spe = zalloc(sizeof(struct arm_spe)); |
1306 | if (!spe) |
1307 | return -ENOMEM; |
1308 | |
1309 | err = auxtrace_queues__init(&spe->queues); |
1310 | if (err) |
1311 | goto err_free; |
1312 | |
1313 | spe->session = session; |
1314 | spe->machine = &session->machines.host; /* No kvm support */ |
1315 | spe->auxtrace_type = auxtrace_info->type; |
1316 | spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; |
1317 | spe->midr = midr; |
1318 | |
1319 | spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); |
1320 | |
1321 | /* |
1322 | * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead |
1323 | * and the parameters for hardware clock are stored in the session |
1324 | * context. Passes these parameters to the struct perf_tsc_conversion |
1325 | * in "spe->tc", which is used for later conversion between clock |
1326 | * counter and timestamp. |
1327 | * |
1328 | * For backward compatibility, copies the fields starting from |
1329 | * "time_cycles" only if they are contained in the event. |
1330 | */ |
1331 | spe->tc.time_shift = tc->time_shift; |
1332 | spe->tc.time_mult = tc->time_mult; |
1333 | spe->tc.time_zero = tc->time_zero; |
1334 | |
1335 | if (event_contains(*tc, time_cycles)) { |
1336 | spe->tc.time_cycles = tc->time_cycles; |
1337 | spe->tc.time_mask = tc->time_mask; |
1338 | spe->tc.cap_user_time_zero = tc->cap_user_time_zero; |
1339 | spe->tc.cap_user_time_short = tc->cap_user_time_short; |
1340 | } |
1341 | |
1342 | spe->auxtrace.process_event = arm_spe_process_event; |
1343 | spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; |
1344 | spe->auxtrace.flush_events = arm_spe_flush; |
1345 | spe->auxtrace.free_events = arm_spe_free_events; |
1346 | spe->auxtrace.free = arm_spe_free; |
1347 | spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; |
1348 | session->auxtrace = &spe->auxtrace; |
1349 | |
1350 | arm_spe_print_info(arr: &auxtrace_info->priv[0]); |
1351 | |
1352 | if (dump_trace) |
1353 | return 0; |
1354 | |
1355 | if (session->itrace_synth_opts && session->itrace_synth_opts->set) |
1356 | spe->synth_opts = *session->itrace_synth_opts; |
1357 | else |
1358 | itrace_synth_opts__set_default(&spe->synth_opts, false); |
1359 | |
1360 | err = arm_spe_synth_events(spe, session); |
1361 | if (err) |
1362 | goto err_free_queues; |
1363 | |
1364 | err = auxtrace_queues__process_index(&spe->queues, session); |
1365 | if (err) |
1366 | goto err_free_queues; |
1367 | |
1368 | if (spe->queues.populated) |
1369 | spe->data_queued = true; |
1370 | |
1371 | return 0; |
1372 | |
1373 | err_free_queues: |
1374 | auxtrace_queues__free(&spe->queues); |
1375 | session->auxtrace = NULL; |
1376 | err_free: |
1377 | free(spe); |
1378 | return err; |
1379 | } |
1380 | |