1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright(C) 2015-2018 Linaro Limited. |
4 | * |
5 | * Author: Tor Jeremiassen <tor@ti.com> |
6 | * Author: Mathieu Poirier <mathieu.poirier@linaro.org> |
7 | */ |
8 | |
9 | #include <linux/kernel.h> |
10 | #include <linux/bitfield.h> |
11 | #include <linux/bitops.h> |
12 | #include <linux/coresight-pmu.h> |
13 | #include <linux/err.h> |
14 | #include <linux/log2.h> |
15 | #include <linux/types.h> |
16 | #include <linux/zalloc.h> |
17 | |
18 | #include <stdlib.h> |
19 | |
20 | #include "auxtrace.h" |
21 | #include "color.h" |
22 | #include "cs-etm.h" |
23 | #include "cs-etm-decoder/cs-etm-decoder.h" |
24 | #include "debug.h" |
25 | #include "dso.h" |
26 | #include "evlist.h" |
27 | #include "intlist.h" |
28 | #include "machine.h" |
29 | #include "map.h" |
30 | #include "perf.h" |
31 | #include "session.h" |
32 | #include "map_symbol.h" |
33 | #include "branch.h" |
34 | #include "symbol.h" |
35 | #include "tool.h" |
36 | #include "thread.h" |
37 | #include "thread-stack.h" |
38 | #include "tsc.h" |
39 | #include <tools/libc_compat.h> |
40 | #include "util/synthetic-events.h" |
41 | #include "util/util.h" |
42 | |
43 | struct cs_etm_auxtrace { |
44 | struct auxtrace auxtrace; |
45 | struct auxtrace_queues queues; |
46 | struct auxtrace_heap heap; |
47 | struct itrace_synth_opts synth_opts; |
48 | struct perf_session *session; |
49 | struct perf_tsc_conversion tc; |
50 | |
51 | /* |
52 | * Timeless has no timestamps in the trace so overlapping mmap lookups |
53 | * are less accurate but produces smaller trace data. We use context IDs |
54 | * in the trace instead of matching timestamps with fork records so |
55 | * they're not really needed in the general case. Overlapping mmaps |
56 | * happen in cases like between a fork and an exec. |
57 | */ |
58 | bool timeless_decoding; |
59 | |
60 | /* |
61 | * Per-thread ignores the trace channel ID and instead assumes that |
62 | * everything in a buffer comes from the same process regardless of |
63 | * which CPU it ran on. It also implies no context IDs so the TID is |
64 | * taken from the auxtrace buffer. |
65 | */ |
66 | bool per_thread_decoding; |
67 | bool snapshot_mode; |
68 | bool data_queued; |
69 | bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ |
70 | |
71 | int num_cpu; |
72 | u64 latest_kernel_timestamp; |
73 | u32 auxtrace_type; |
74 | u64 branches_sample_type; |
75 | u64 branches_id; |
76 | u64 instructions_sample_type; |
77 | u64 instructions_sample_period; |
78 | u64 instructions_id; |
79 | u64 **metadata; |
80 | unsigned int pmu_type; |
81 | enum cs_etm_pid_fmt pid_fmt; |
82 | }; |
83 | |
84 | struct cs_etm_traceid_queue { |
85 | u8 trace_chan_id; |
86 | u64 period_instructions; |
87 | size_t last_branch_pos; |
88 | union perf_event *event_buf; |
89 | struct thread *thread; |
90 | struct thread *prev_packet_thread; |
91 | ocsd_ex_level prev_packet_el; |
92 | ocsd_ex_level el; |
93 | struct branch_stack *last_branch; |
94 | struct branch_stack *last_branch_rb; |
95 | struct cs_etm_packet *prev_packet; |
96 | struct cs_etm_packet *packet; |
97 | struct cs_etm_packet_queue packet_queue; |
98 | }; |
99 | |
100 | struct cs_etm_queue { |
101 | struct cs_etm_auxtrace *etm; |
102 | struct cs_etm_decoder *decoder; |
103 | struct auxtrace_buffer *buffer; |
104 | unsigned int queue_nr; |
105 | u8 pending_timestamp_chan_id; |
106 | u64 offset; |
107 | const unsigned char *buf; |
108 | size_t buf_len, buf_used; |
109 | /* Conversion between traceID and index in traceid_queues array */ |
110 | struct intlist *traceid_queues_list; |
111 | struct cs_etm_traceid_queue **traceid_queues; |
112 | }; |
113 | |
114 | /* RB tree for quick conversion between traceID and metadata pointers */ |
115 | static struct intlist *traceid_list; |
116 | |
117 | static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); |
118 | static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, |
119 | pid_t tid); |
120 | static int cs_etm__get_data_block(struct cs_etm_queue *etmq); |
121 | static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); |
122 | |
123 | /* PTMs ETMIDR [11:8] set to b0011 */ |
124 | #define ETMIDR_PTM_VERSION 0x00000300 |
125 | |
126 | /* |
127 | * A struct auxtrace_heap_item only has a queue_nr and a timestamp to |
128 | * work with. One option is to modify to auxtrace_heap_XYZ() API or simply |
129 | * encode the etm queue number as the upper 16 bit and the channel as |
130 | * the lower 16 bit. |
131 | */ |
132 | #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \ |
133 | (queue_nr << 16 | trace_chan_id) |
134 | #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) |
135 | #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) |
136 | |
137 | static u32 cs_etm__get_v7_protocol_version(u32 etmidr) |
138 | { |
139 | etmidr &= ETMIDR_PTM_VERSION; |
140 | |
141 | if (etmidr == ETMIDR_PTM_VERSION) |
142 | return CS_ETM_PROTO_PTM; |
143 | |
144 | return CS_ETM_PROTO_ETMV3; |
145 | } |
146 | |
147 | static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) |
148 | { |
149 | struct int_node *inode; |
150 | u64 *metadata; |
151 | |
152 | inode = intlist__find(ilist: traceid_list, i: trace_chan_id); |
153 | if (!inode) |
154 | return -EINVAL; |
155 | |
156 | metadata = inode->priv; |
157 | *magic = metadata[CS_ETM_MAGIC]; |
158 | return 0; |
159 | } |
160 | |
161 | int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) |
162 | { |
163 | struct int_node *inode; |
164 | u64 *metadata; |
165 | |
166 | inode = intlist__find(ilist: traceid_list, i: trace_chan_id); |
167 | if (!inode) |
168 | return -EINVAL; |
169 | |
170 | metadata = inode->priv; |
171 | *cpu = (int)metadata[CS_ETM_CPU]; |
172 | return 0; |
173 | } |
174 | |
175 | /* |
176 | * The returned PID format is presented as an enum: |
177 | * |
178 | * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. |
179 | * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. |
180 | * CS_ETM_PIDFMT_NONE: No context IDs |
181 | * |
182 | * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 |
183 | * are enabled at the same time when the session runs on an EL2 kernel. |
184 | * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be |
185 | * recorded in the trace data, the tool will selectively use |
186 | * CONTEXTIDR_EL2 as PID. |
187 | * |
188 | * The result is cached in etm->pid_fmt so this function only needs to be called |
189 | * when processing the aux info. |
190 | */ |
191 | static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) |
192 | { |
193 | u64 val; |
194 | |
195 | if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { |
196 | val = metadata[CS_ETM_ETMCR]; |
197 | /* CONTEXTIDR is traced */ |
198 | if (val & BIT(ETM_OPT_CTXTID)) |
199 | return CS_ETM_PIDFMT_CTXTID; |
200 | } else { |
201 | val = metadata[CS_ETMV4_TRCCONFIGR]; |
202 | /* CONTEXTIDR_EL2 is traced */ |
203 | if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) |
204 | return CS_ETM_PIDFMT_CTXTID2; |
205 | /* CONTEXTIDR_EL1 is traced */ |
206 | else if (val & BIT(ETM4_CFG_BIT_CTXTID)) |
207 | return CS_ETM_PIDFMT_CTXTID; |
208 | } |
209 | |
210 | return CS_ETM_PIDFMT_NONE; |
211 | } |
212 | |
213 | enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) |
214 | { |
215 | return etmq->etm->pid_fmt; |
216 | } |
217 | |
218 | static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata) |
219 | { |
220 | struct int_node *inode; |
221 | |
222 | /* Get an RB node for this CPU */ |
223 | inode = intlist__findnew(ilist: traceid_list, i: trace_chan_id); |
224 | |
225 | /* Something went wrong, no need to continue */ |
226 | if (!inode) |
227 | return -ENOMEM; |
228 | |
229 | /* |
230 | * The node for that CPU should not be taken. |
231 | * Back out if that's the case. |
232 | */ |
233 | if (inode->priv) |
234 | return -EINVAL; |
235 | |
236 | /* All good, associate the traceID with the metadata pointer */ |
237 | inode->priv = cpu_metadata; |
238 | |
239 | return 0; |
240 | } |
241 | |
242 | static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) |
243 | { |
244 | u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; |
245 | |
246 | switch (cs_etm_magic) { |
247 | case __perf_cs_etmv3_magic: |
248 | *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & |
249 | CORESIGHT_TRACE_ID_VAL_MASK); |
250 | break; |
251 | case __perf_cs_etmv4_magic: |
252 | case __perf_cs_ete_magic: |
253 | *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & |
254 | CORESIGHT_TRACE_ID_VAL_MASK); |
255 | break; |
256 | default: |
257 | return -EINVAL; |
258 | } |
259 | return 0; |
260 | } |
261 | |
262 | /* |
263 | * update metadata trace ID from the value found in the AUX_HW_INFO packet. |
264 | * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present. |
265 | */ |
266 | static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) |
267 | { |
268 | u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; |
269 | |
270 | switch (cs_etm_magic) { |
271 | case __perf_cs_etmv3_magic: |
272 | cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; |
273 | break; |
274 | case __perf_cs_etmv4_magic: |
275 | case __perf_cs_ete_magic: |
276 | cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; |
277 | break; |
278 | |
279 | default: |
280 | return -EINVAL; |
281 | } |
282 | return 0; |
283 | } |
284 | |
285 | /* |
286 | * Get a metadata index for a specific cpu from an array. |
287 | * |
288 | */ |
289 | static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) |
290 | { |
291 | int i; |
292 | |
293 | for (i = 0; i < etm->num_cpu; i++) { |
294 | if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { |
295 | return i; |
296 | } |
297 | } |
298 | |
299 | return -1; |
300 | } |
301 | |
302 | /* |
303 | * Get a metadata for a specific cpu from an array. |
304 | * |
305 | */ |
306 | static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) |
307 | { |
308 | int idx = get_cpu_data_idx(etm, cpu); |
309 | |
310 | return (idx != -1) ? etm->metadata[idx] : NULL; |
311 | } |
312 | |
313 | /* |
314 | * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. |
315 | * |
316 | * The payload associates the Trace ID and the CPU. |
317 | * The routine is tolerant of seeing multiple packets with the same association, |
318 | * but a CPU / Trace ID association changing during a session is an error. |
319 | */ |
320 | static int cs_etm__process_aux_output_hw_id(struct perf_session *session, |
321 | union perf_event *event) |
322 | { |
323 | struct cs_etm_auxtrace *etm; |
324 | struct perf_sample sample; |
325 | struct int_node *inode; |
326 | struct evsel *evsel; |
327 | u64 *cpu_data; |
328 | u64 hw_id; |
329 | int cpu, version, err; |
330 | u8 trace_chan_id, curr_chan_id; |
331 | |
332 | /* extract and parse the HW ID */ |
333 | hw_id = event->aux_output_hw_id.hw_id; |
334 | version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id); |
335 | trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); |
336 | |
337 | /* check that we can handle this version */ |
338 | if (version > CS_AUX_HW_ID_CURR_VERSION) |
339 | return -EINVAL; |
340 | |
341 | /* get access to the etm metadata */ |
342 | etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); |
343 | if (!etm || !etm->metadata) |
344 | return -EINVAL; |
345 | |
346 | /* parse the sample to get the CPU */ |
347 | evsel = evlist__event2evsel(evlist: session->evlist, event); |
348 | if (!evsel) |
349 | return -EINVAL; |
350 | err = evsel__parse_sample(evsel, event, sample: &sample); |
351 | if (err) |
352 | return err; |
353 | cpu = sample.cpu; |
354 | if (cpu == -1) { |
355 | /* no CPU in the sample - possibly recorded with an old version of perf */ |
356 | pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record." ); |
357 | return -EINVAL; |
358 | } |
359 | |
360 | /* See if the ID is mapped to a CPU, and it matches the current CPU */ |
361 | inode = intlist__find(ilist: traceid_list, i: trace_chan_id); |
362 | if (inode) { |
363 | cpu_data = inode->priv; |
364 | if ((int)cpu_data[CS_ETM_CPU] != cpu) { |
365 | pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n" ); |
366 | return -EINVAL; |
367 | } |
368 | |
369 | /* check that the mapped ID matches */ |
370 | err = cs_etm__metadata_get_trace_id(trace_chan_id: &curr_chan_id, cpu_metadata: cpu_data); |
371 | if (err) |
372 | return err; |
373 | if (curr_chan_id != trace_chan_id) { |
374 | pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n" ); |
375 | return -EINVAL; |
376 | } |
377 | |
378 | /* mapped and matched - return OK */ |
379 | return 0; |
380 | } |
381 | |
382 | cpu_data = get_cpu_data(etm, cpu); |
383 | if (cpu_data == NULL) |
384 | return err; |
385 | |
386 | /* not one we've seen before - lets map it */ |
387 | err = cs_etm__map_trace_id(trace_chan_id, cpu_metadata: cpu_data); |
388 | if (err) |
389 | return err; |
390 | |
391 | /* |
392 | * if we are picking up the association from the packet, need to plug |
393 | * the correct trace ID into the metadata for setting up decoders later. |
394 | */ |
395 | err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_metadata: cpu_data); |
396 | return err; |
397 | } |
398 | |
399 | void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, |
400 | u8 trace_chan_id) |
401 | { |
402 | /* |
403 | * When a timestamp packet is encountered the backend code |
404 | * is stopped so that the front end has time to process packets |
405 | * that were accumulated in the traceID queue. Since there can |
406 | * be more than one channel per cs_etm_queue, we need to specify |
407 | * what traceID queue needs servicing. |
408 | */ |
409 | etmq->pending_timestamp_chan_id = trace_chan_id; |
410 | } |
411 | |
412 | static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, |
413 | u8 *trace_chan_id) |
414 | { |
415 | struct cs_etm_packet_queue *packet_queue; |
416 | |
417 | if (!etmq->pending_timestamp_chan_id) |
418 | return 0; |
419 | |
420 | if (trace_chan_id) |
421 | *trace_chan_id = etmq->pending_timestamp_chan_id; |
422 | |
423 | packet_queue = cs_etm__etmq_get_packet_queue(etmq, |
424 | etmq->pending_timestamp_chan_id); |
425 | if (!packet_queue) |
426 | return 0; |
427 | |
428 | /* Acknowledge pending status */ |
429 | etmq->pending_timestamp_chan_id = 0; |
430 | |
431 | /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ |
432 | return packet_queue->cs_timestamp; |
433 | } |
434 | |
435 | static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) |
436 | { |
437 | int i; |
438 | |
439 | queue->head = 0; |
440 | queue->tail = 0; |
441 | queue->packet_count = 0; |
442 | for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { |
443 | queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; |
444 | queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; |
445 | queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; |
446 | queue->packet_buffer[i].instr_count = 0; |
447 | queue->packet_buffer[i].last_instr_taken_branch = false; |
448 | queue->packet_buffer[i].last_instr_size = 0; |
449 | queue->packet_buffer[i].last_instr_type = 0; |
450 | queue->packet_buffer[i].last_instr_subtype = 0; |
451 | queue->packet_buffer[i].last_instr_cond = 0; |
452 | queue->packet_buffer[i].flags = 0; |
453 | queue->packet_buffer[i].exception_number = UINT32_MAX; |
454 | queue->packet_buffer[i].trace_chan_id = UINT8_MAX; |
455 | queue->packet_buffer[i].cpu = INT_MIN; |
456 | } |
457 | } |
458 | |
459 | static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) |
460 | { |
461 | int idx; |
462 | struct int_node *inode; |
463 | struct cs_etm_traceid_queue *tidq; |
464 | struct intlist *traceid_queues_list = etmq->traceid_queues_list; |
465 | |
466 | intlist__for_each_entry(inode, traceid_queues_list) { |
467 | idx = (int)(intptr_t)inode->priv; |
468 | tidq = etmq->traceid_queues[idx]; |
469 | cs_etm__clear_packet_queue(queue: &tidq->packet_queue); |
470 | } |
471 | } |
472 | |
473 | static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, |
474 | struct cs_etm_traceid_queue *tidq, |
475 | u8 trace_chan_id) |
476 | { |
477 | int rc = -ENOMEM; |
478 | struct auxtrace_queue *queue; |
479 | struct cs_etm_auxtrace *etm = etmq->etm; |
480 | |
481 | cs_etm__clear_packet_queue(queue: &tidq->packet_queue); |
482 | |
483 | queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; |
484 | tidq->trace_chan_id = trace_chan_id; |
485 | tidq->el = tidq->prev_packet_el = ocsd_EL_unknown; |
486 | tidq->thread = machine__findnew_thread(machine: &etm->session->machines.host, pid: -1, |
487 | tid: queue->tid); |
488 | tidq->prev_packet_thread = machine__idle_thread(machine: &etm->session->machines.host); |
489 | |
490 | tidq->packet = zalloc(sizeof(struct cs_etm_packet)); |
491 | if (!tidq->packet) |
492 | goto out; |
493 | |
494 | tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); |
495 | if (!tidq->prev_packet) |
496 | goto out_free; |
497 | |
498 | if (etm->synth_opts.last_branch) { |
499 | size_t sz = sizeof(struct branch_stack); |
500 | |
501 | sz += etm->synth_opts.last_branch_sz * |
502 | sizeof(struct branch_entry); |
503 | tidq->last_branch = zalloc(sz); |
504 | if (!tidq->last_branch) |
505 | goto out_free; |
506 | tidq->last_branch_rb = zalloc(sz); |
507 | if (!tidq->last_branch_rb) |
508 | goto out_free; |
509 | } |
510 | |
511 | tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); |
512 | if (!tidq->event_buf) |
513 | goto out_free; |
514 | |
515 | return 0; |
516 | |
517 | out_free: |
518 | zfree(&tidq->last_branch_rb); |
519 | zfree(&tidq->last_branch); |
520 | zfree(&tidq->prev_packet); |
521 | zfree(&tidq->packet); |
522 | out: |
523 | return rc; |
524 | } |
525 | |
526 | static struct cs_etm_traceid_queue |
527 | *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) |
528 | { |
529 | int idx; |
530 | struct int_node *inode; |
531 | struct intlist *traceid_queues_list; |
532 | struct cs_etm_traceid_queue *tidq, **traceid_queues; |
533 | struct cs_etm_auxtrace *etm = etmq->etm; |
534 | |
535 | if (etm->per_thread_decoding) |
536 | trace_chan_id = CS_ETM_PER_THREAD_TRACEID; |
537 | |
538 | traceid_queues_list = etmq->traceid_queues_list; |
539 | |
540 | /* |
541 | * Check if the traceid_queue exist for this traceID by looking |
542 | * in the queue list. |
543 | */ |
544 | inode = intlist__find(ilist: traceid_queues_list, i: trace_chan_id); |
545 | if (inode) { |
546 | idx = (int)(intptr_t)inode->priv; |
547 | return etmq->traceid_queues[idx]; |
548 | } |
549 | |
550 | /* We couldn't find a traceid_queue for this traceID, allocate one */ |
551 | tidq = malloc(sizeof(*tidq)); |
552 | if (!tidq) |
553 | return NULL; |
554 | |
555 | memset(tidq, 0, sizeof(*tidq)); |
556 | |
557 | /* Get a valid index for the new traceid_queue */ |
558 | idx = intlist__nr_entries(ilist: traceid_queues_list); |
559 | /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ |
560 | inode = intlist__findnew(ilist: traceid_queues_list, i: trace_chan_id); |
561 | if (!inode) |
562 | goto out_free; |
563 | |
564 | /* Associate this traceID with this index */ |
565 | inode->priv = (void *)(intptr_t)idx; |
566 | |
567 | if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) |
568 | goto out_free; |
569 | |
570 | /* Grow the traceid_queues array by one unit */ |
571 | traceid_queues = etmq->traceid_queues; |
572 | traceid_queues = reallocarray(traceid_queues, |
573 | idx + 1, |
574 | sizeof(*traceid_queues)); |
575 | |
576 | /* |
577 | * On failure reallocarray() returns NULL and the original block of |
578 | * memory is left untouched. |
579 | */ |
580 | if (!traceid_queues) |
581 | goto out_free; |
582 | |
583 | traceid_queues[idx] = tidq; |
584 | etmq->traceid_queues = traceid_queues; |
585 | |
586 | return etmq->traceid_queues[idx]; |
587 | |
588 | out_free: |
589 | /* |
590 | * Function intlist__remove() removes the inode from the list |
591 | * and delete the memory associated to it. |
592 | */ |
593 | intlist__remove(ilist: traceid_queues_list, in: inode); |
594 | free(tidq); |
595 | |
596 | return NULL; |
597 | } |
598 | |
599 | struct cs_etm_packet_queue |
600 | *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) |
601 | { |
602 | struct cs_etm_traceid_queue *tidq; |
603 | |
604 | tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); |
605 | if (tidq) |
606 | return &tidq->packet_queue; |
607 | |
608 | return NULL; |
609 | } |
610 | |
611 | static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, |
612 | struct cs_etm_traceid_queue *tidq) |
613 | { |
614 | struct cs_etm_packet *tmp; |
615 | |
616 | if (etm->synth_opts.branches || etm->synth_opts.last_branch || |
617 | etm->synth_opts.instructions) { |
618 | /* |
619 | * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for |
620 | * the next incoming packet. |
621 | * |
622 | * Threads and exception levels are also tracked for both the |
623 | * previous and current packets. This is because the previous |
624 | * packet is used for the 'from' IP for branch samples, so the |
625 | * thread at that time must also be assigned to that sample. |
626 | * Across discontinuity packets the thread can change, so by |
627 | * tracking the thread for the previous packet the branch sample |
628 | * will have the correct info. |
629 | */ |
630 | tmp = tidq->packet; |
631 | tidq->packet = tidq->prev_packet; |
632 | tidq->prev_packet = tmp; |
633 | tidq->prev_packet_el = tidq->el; |
634 | thread__put(thread: tidq->prev_packet_thread); |
635 | tidq->prev_packet_thread = thread__get(thread: tidq->thread); |
636 | } |
637 | } |
638 | |
639 | static void cs_etm__packet_dump(const char *pkt_string) |
640 | { |
641 | const char *color = PERF_COLOR_BLUE; |
642 | int len = strlen(pkt_string); |
643 | |
644 | if (len && (pkt_string[len-1] == '\n')) |
645 | color_fprintf(stdout, color, " %s" , pkt_string); |
646 | else |
647 | color_fprintf(stdout, color, " %s\n" , pkt_string); |
648 | |
649 | fflush(stdout); |
650 | } |
651 | |
652 | static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, |
653 | struct cs_etm_auxtrace *etm, int t_idx, |
654 | int m_idx, u32 etmidr) |
655 | { |
656 | u64 **metadata = etm->metadata; |
657 | |
658 | t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr); |
659 | t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR]; |
660 | t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR]; |
661 | } |
662 | |
663 | static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, |
664 | struct cs_etm_auxtrace *etm, int t_idx, |
665 | int m_idx) |
666 | { |
667 | u64 **metadata = etm->metadata; |
668 | |
669 | t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i; |
670 | t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0]; |
671 | t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1]; |
672 | t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2]; |
673 | t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8]; |
674 | t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR]; |
675 | t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR]; |
676 | } |
677 | |
678 | static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, |
679 | struct cs_etm_auxtrace *etm, int t_idx, |
680 | int m_idx) |
681 | { |
682 | u64 **metadata = etm->metadata; |
683 | |
684 | t_params[t_idx].protocol = CS_ETM_PROTO_ETE; |
685 | t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0]; |
686 | t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1]; |
687 | t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2]; |
688 | t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8]; |
689 | t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR]; |
690 | t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR]; |
691 | t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH]; |
692 | } |
693 | |
694 | static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, |
695 | struct cs_etm_auxtrace *etm, |
696 | bool formatted, |
697 | int sample_cpu, |
698 | int decoders) |
699 | { |
700 | int t_idx, m_idx; |
701 | u32 etmidr; |
702 | u64 architecture; |
703 | |
704 | for (t_idx = 0; t_idx < decoders; t_idx++) { |
705 | if (formatted) |
706 | m_idx = t_idx; |
707 | else { |
708 | m_idx = get_cpu_data_idx(etm, cpu: sample_cpu); |
709 | if (m_idx == -1) { |
710 | pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n" ); |
711 | m_idx = 0; |
712 | } |
713 | } |
714 | |
715 | architecture = etm->metadata[m_idx][CS_ETM_MAGIC]; |
716 | |
717 | switch (architecture) { |
718 | case __perf_cs_etmv3_magic: |
719 | etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR]; |
720 | cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr); |
721 | break; |
722 | case __perf_cs_etmv4_magic: |
723 | cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx); |
724 | break; |
725 | case __perf_cs_ete_magic: |
726 | cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx); |
727 | break; |
728 | default: |
729 | return -EINVAL; |
730 | } |
731 | } |
732 | |
733 | return 0; |
734 | } |
735 | |
736 | static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, |
737 | struct cs_etm_queue *etmq, |
738 | enum cs_etm_decoder_operation mode, |
739 | bool formatted) |
740 | { |
741 | int ret = -EINVAL; |
742 | |
743 | if (!(mode < CS_ETM_OPERATION_MAX)) |
744 | goto out; |
745 | |
746 | d_params->packet_printer = cs_etm__packet_dump; |
747 | d_params->operation = mode; |
748 | d_params->data = etmq; |
749 | d_params->formatted = formatted; |
750 | d_params->fsyncs = false; |
751 | d_params->hsyncs = false; |
752 | d_params->frame_aligned = true; |
753 | |
754 | ret = 0; |
755 | out: |
756 | return ret; |
757 | } |
758 | |
759 | static void cs_etm__dump_event(struct cs_etm_queue *etmq, |
760 | struct auxtrace_buffer *buffer) |
761 | { |
762 | int ret; |
763 | const char *color = PERF_COLOR_BLUE; |
764 | size_t buffer_used = 0; |
765 | |
766 | fprintf(stdout, "\n" ); |
767 | color_fprintf(stdout, color, |
768 | ". ... CoreSight %s Trace data: size %#zx bytes\n" , |
769 | cs_etm_decoder__get_name(etmq->decoder), buffer->size); |
770 | |
771 | do { |
772 | size_t consumed; |
773 | |
774 | ret = cs_etm_decoder__process_data_block( |
775 | decoder: etmq->decoder, indx: buffer->offset, |
776 | buf: &((u8 *)buffer->data)[buffer_used], |
777 | len: buffer->size - buffer_used, consumed: &consumed); |
778 | if (ret) |
779 | break; |
780 | |
781 | buffer_used += consumed; |
782 | } while (buffer_used < buffer->size); |
783 | |
784 | cs_etm_decoder__reset(decoder: etmq->decoder); |
785 | } |
786 | |
787 | static int cs_etm__flush_events(struct perf_session *session, |
788 | struct perf_tool *tool) |
789 | { |
790 | struct cs_etm_auxtrace *etm = container_of(session->auxtrace, |
791 | struct cs_etm_auxtrace, |
792 | auxtrace); |
793 | if (dump_trace) |
794 | return 0; |
795 | |
796 | if (!tool->ordered_events) |
797 | return -EINVAL; |
798 | |
799 | if (etm->timeless_decoding) { |
800 | /* |
801 | * Pass tid = -1 to process all queues. But likely they will have |
802 | * already been processed on PERF_RECORD_EXIT anyway. |
803 | */ |
804 | return cs_etm__process_timeless_queues(etm, tid: -1); |
805 | } |
806 | |
807 | return cs_etm__process_timestamped_queues(etm); |
808 | } |
809 | |
810 | static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) |
811 | { |
812 | int idx; |
813 | uintptr_t priv; |
814 | struct int_node *inode, *tmp; |
815 | struct cs_etm_traceid_queue *tidq; |
816 | struct intlist *traceid_queues_list = etmq->traceid_queues_list; |
817 | |
818 | intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { |
819 | priv = (uintptr_t)inode->priv; |
820 | idx = priv; |
821 | |
822 | /* Free this traceid_queue from the array */ |
823 | tidq = etmq->traceid_queues[idx]; |
824 | thread__zput(tidq->thread); |
825 | thread__zput(tidq->prev_packet_thread); |
826 | zfree(&tidq->event_buf); |
827 | zfree(&tidq->last_branch); |
828 | zfree(&tidq->last_branch_rb); |
829 | zfree(&tidq->prev_packet); |
830 | zfree(&tidq->packet); |
831 | zfree(&tidq); |
832 | |
833 | /* |
834 | * Function intlist__remove() removes the inode from the list |
835 | * and delete the memory associated to it. |
836 | */ |
837 | intlist__remove(ilist: traceid_queues_list, in: inode); |
838 | } |
839 | |
840 | /* Then the RB tree itself */ |
841 | intlist__delete(ilist: traceid_queues_list); |
842 | etmq->traceid_queues_list = NULL; |
843 | |
844 | /* finally free the traceid_queues array */ |
845 | zfree(&etmq->traceid_queues); |
846 | } |
847 | |
848 | static void cs_etm__free_queue(void *priv) |
849 | { |
850 | struct cs_etm_queue *etmq = priv; |
851 | |
852 | if (!etmq) |
853 | return; |
854 | |
855 | cs_etm_decoder__free(decoder: etmq->decoder); |
856 | cs_etm__free_traceid_queues(etmq); |
857 | free(etmq); |
858 | } |
859 | |
860 | static void cs_etm__free_events(struct perf_session *session) |
861 | { |
862 | unsigned int i; |
863 | struct cs_etm_auxtrace *aux = container_of(session->auxtrace, |
864 | struct cs_etm_auxtrace, |
865 | auxtrace); |
866 | struct auxtrace_queues *queues = &aux->queues; |
867 | |
868 | for (i = 0; i < queues->nr_queues; i++) { |
869 | cs_etm__free_queue(priv: queues->queue_array[i].priv); |
870 | queues->queue_array[i].priv = NULL; |
871 | } |
872 | |
873 | auxtrace_queues__free(queues); |
874 | } |
875 | |
876 | static void cs_etm__free(struct perf_session *session) |
877 | { |
878 | int i; |
879 | struct int_node *inode, *tmp; |
880 | struct cs_etm_auxtrace *aux = container_of(session->auxtrace, |
881 | struct cs_etm_auxtrace, |
882 | auxtrace); |
883 | cs_etm__free_events(session); |
884 | session->auxtrace = NULL; |
885 | |
886 | /* First remove all traceID/metadata nodes for the RB tree */ |
887 | intlist__for_each_entry_safe(inode, tmp, traceid_list) |
888 | intlist__remove(ilist: traceid_list, in: inode); |
889 | /* Then the RB tree itself */ |
890 | intlist__delete(ilist: traceid_list); |
891 | |
892 | for (i = 0; i < aux->num_cpu; i++) |
893 | zfree(&aux->metadata[i]); |
894 | |
895 | zfree(&aux->metadata); |
896 | zfree(&aux); |
897 | } |
898 | |
899 | static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, |
900 | struct evsel *evsel) |
901 | { |
902 | struct cs_etm_auxtrace *aux = container_of(session->auxtrace, |
903 | struct cs_etm_auxtrace, |
904 | auxtrace); |
905 | |
906 | return evsel->core.attr.type == aux->pmu_type; |
907 | } |
908 | |
909 | static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq, |
910 | ocsd_ex_level el) |
911 | { |
912 | enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq); |
913 | |
914 | /* |
915 | * For any virtualisation based on nVHE (e.g. pKVM), or host kernels |
916 | * running at EL1 assume everything is the host. |
917 | */ |
918 | if (pid_fmt == CS_ETM_PIDFMT_CTXTID) |
919 | return &etmq->etm->session->machines.host; |
920 | |
921 | /* |
922 | * Not perfect, but otherwise assume anything in EL1 is the default |
923 | * guest, and everything else is the host. Distinguishing between guest |
924 | * and host userspaces isn't currently supported either. Neither is |
925 | * multiple guest support. All this does is reduce the likeliness of |
926 | * decode errors where we look into the host kernel maps when it should |
927 | * have been the guest maps. |
928 | */ |
929 | switch (el) { |
930 | case ocsd_EL1: |
931 | return machines__find_guest(machines: &etmq->etm->session->machines, |
932 | DEFAULT_GUEST_KERNEL_ID); |
933 | case ocsd_EL3: |
934 | case ocsd_EL2: |
935 | case ocsd_EL0: |
936 | case ocsd_EL_unknown: |
937 | default: |
938 | return &etmq->etm->session->machines.host; |
939 | } |
940 | } |
941 | |
942 | static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, |
943 | ocsd_ex_level el) |
944 | { |
945 | struct machine *machine = cs_etm__get_machine(etmq, el); |
946 | |
947 | if (address >= machine__kernel_start(machine)) { |
948 | if (machine__is_host(machine)) |
949 | return PERF_RECORD_MISC_KERNEL; |
950 | else |
951 | return PERF_RECORD_MISC_GUEST_KERNEL; |
952 | } else { |
953 | if (machine__is_host(machine)) |
954 | return PERF_RECORD_MISC_USER; |
955 | else { |
956 | /* |
957 | * Can't really happen at the moment because |
958 | * cs_etm__get_machine() will always return |
959 | * machines.host for any non EL1 trace. |
960 | */ |
961 | return PERF_RECORD_MISC_GUEST_USER; |
962 | } |
963 | } |
964 | } |
965 | |
966 | static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, |
967 | u64 address, size_t size, u8 *buffer, |
968 | const ocsd_mem_space_acc_t mem_space) |
969 | { |
970 | u8 cpumode; |
971 | u64 offset; |
972 | int len; |
973 | struct addr_location al; |
974 | struct dso *dso; |
975 | struct cs_etm_traceid_queue *tidq; |
976 | int ret = 0; |
977 | |
978 | if (!etmq) |
979 | return 0; |
980 | |
981 | addr_location__init(al: &al); |
982 | tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); |
983 | if (!tidq) |
984 | goto out; |
985 | |
986 | /* |
987 | * We've already tracked EL along side the PID in cs_etm__set_thread() |
988 | * so double check that it matches what OpenCSD thinks as well. It |
989 | * doesn't distinguish between EL0 and EL1 for this mem access callback |
990 | * so we had to do the extra tracking. Skip validation if it's any of |
991 | * the 'any' values. |
992 | */ |
993 | if (!(mem_space == OCSD_MEM_SPACE_ANY || |
994 | mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { |
995 | if (mem_space & OCSD_MEM_SPACE_EL1N) { |
996 | /* Includes both non secure EL1 and EL0 */ |
997 | assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0); |
998 | } else if (mem_space & OCSD_MEM_SPACE_EL2) |
999 | assert(tidq->el == ocsd_EL2); |
1000 | else if (mem_space & OCSD_MEM_SPACE_EL3) |
1001 | assert(tidq->el == ocsd_EL3); |
1002 | } |
1003 | |
1004 | cpumode = cs_etm__cpu_mode(etmq, address, tidq->el); |
1005 | |
1006 | if (!thread__find_map(thread: tidq->thread, cpumode, addr: address, al: &al)) |
1007 | goto out; |
1008 | |
1009 | dso = map__dso(map: al.map); |
1010 | if (!dso) |
1011 | goto out; |
1012 | |
1013 | if (dso->data.status == DSO_DATA_STATUS_ERROR && |
1014 | dso__data_status_seen(dso, by: DSO_DATA_STATUS_SEEN_ITRACE)) |
1015 | goto out; |
1016 | |
1017 | offset = map__map_ip(map: al.map, ip_or_rip: address); |
1018 | |
1019 | map__load(map: al.map); |
1020 | |
1021 | len = dso__data_read_offset(dso, machine: maps__machine(maps: thread__maps(thread: tidq->thread)), |
1022 | offset, data: buffer, size); |
1023 | |
1024 | if (len <= 0) { |
1025 | ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" |
1026 | " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n" ); |
1027 | if (!dso->auxtrace_warned) { |
1028 | pr_err("CS ETM Trace: Debug data not found for address %#" PRIx64" in %s\n" , |
1029 | address, |
1030 | dso->long_name ? dso->long_name : "Unknown" ); |
1031 | dso->auxtrace_warned = true; |
1032 | } |
1033 | goto out; |
1034 | } |
1035 | ret = len; |
1036 | out: |
1037 | addr_location__exit(al: &al); |
1038 | return ret; |
1039 | } |
1040 | |
1041 | static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, |
1042 | bool formatted, int sample_cpu) |
1043 | { |
1044 | struct cs_etm_decoder_params d_params; |
1045 | struct cs_etm_trace_params *t_params = NULL; |
1046 | struct cs_etm_queue *etmq; |
1047 | /* |
1048 | * Each queue can only contain data from one CPU when unformatted, so only one decoder is |
1049 | * needed. |
1050 | */ |
1051 | int decoders = formatted ? etm->num_cpu : 1; |
1052 | |
1053 | etmq = zalloc(sizeof(*etmq)); |
1054 | if (!etmq) |
1055 | return NULL; |
1056 | |
1057 | etmq->traceid_queues_list = intlist__new(NULL); |
1058 | if (!etmq->traceid_queues_list) |
1059 | goto out_free; |
1060 | |
1061 | /* Use metadata to fill in trace parameters for trace decoder */ |
1062 | t_params = zalloc(sizeof(*t_params) * decoders); |
1063 | |
1064 | if (!t_params) |
1065 | goto out_free; |
1066 | |
1067 | if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders)) |
1068 | goto out_free; |
1069 | |
1070 | /* Set decoder parameters to decode trace packets */ |
1071 | if (cs_etm__init_decoder_params(d_params: &d_params, etmq, |
1072 | mode: dump_trace ? CS_ETM_OPERATION_PRINT : |
1073 | CS_ETM_OPERATION_DECODE, |
1074 | formatted)) |
1075 | goto out_free; |
1076 | |
1077 | etmq->decoder = cs_etm_decoder__new(num_cpu: decoders, d_params: &d_params, |
1078 | t_params); |
1079 | |
1080 | if (!etmq->decoder) |
1081 | goto out_free; |
1082 | |
1083 | /* |
1084 | * Register a function to handle all memory accesses required by |
1085 | * the trace decoder library. |
1086 | */ |
1087 | if (cs_etm_decoder__add_mem_access_cb(decoder: etmq->decoder, |
1088 | start: 0x0L, end: ((u64) -1L), |
1089 | cb_func: cs_etm__mem_access)) |
1090 | goto out_free_decoder; |
1091 | |
1092 | zfree(&t_params); |
1093 | return etmq; |
1094 | |
1095 | out_free_decoder: |
1096 | cs_etm_decoder__free(decoder: etmq->decoder); |
1097 | out_free: |
1098 | intlist__delete(ilist: etmq->traceid_queues_list); |
1099 | free(etmq); |
1100 | |
1101 | return NULL; |
1102 | } |
1103 | |
1104 | static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, |
1105 | struct auxtrace_queue *queue, |
1106 | unsigned int queue_nr, |
1107 | bool formatted, |
1108 | int sample_cpu) |
1109 | { |
1110 | struct cs_etm_queue *etmq = queue->priv; |
1111 | |
1112 | if (list_empty(head: &queue->head) || etmq) |
1113 | return 0; |
1114 | |
1115 | etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu); |
1116 | |
1117 | if (!etmq) |
1118 | return -ENOMEM; |
1119 | |
1120 | queue->priv = etmq; |
1121 | etmq->etm = etm; |
1122 | etmq->queue_nr = queue_nr; |
1123 | etmq->offset = 0; |
1124 | |
1125 | return 0; |
1126 | } |
1127 | |
1128 | static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, |
1129 | struct cs_etm_queue *etmq, |
1130 | unsigned int queue_nr) |
1131 | { |
1132 | int ret = 0; |
1133 | unsigned int cs_queue_nr; |
1134 | u8 trace_chan_id; |
1135 | u64 cs_timestamp; |
1136 | |
1137 | /* |
1138 | * We are under a CPU-wide trace scenario. As such we need to know |
1139 | * when the code that generated the traces started to execute so that |
1140 | * it can be correlated with execution on other CPUs. So we get a |
1141 | * handle on the beginning of traces and decode until we find a |
1142 | * timestamp. The timestamp is then added to the auxtrace min heap |
1143 | * in order to know what nibble (of all the etmqs) to decode first. |
1144 | */ |
1145 | while (1) { |
1146 | /* |
1147 | * Fetch an aux_buffer from this etmq. Bail if no more |
1148 | * blocks or an error has been encountered. |
1149 | */ |
1150 | ret = cs_etm__get_data_block(etmq); |
1151 | if (ret <= 0) |
1152 | goto out; |
1153 | |
1154 | /* |
1155 | * Run decoder on the trace block. The decoder will stop when |
1156 | * encountering a CS timestamp, a full packet queue or the end of |
1157 | * trace for that block. |
1158 | */ |
1159 | ret = cs_etm__decode_data_block(etmq); |
1160 | if (ret) |
1161 | goto out; |
1162 | |
1163 | /* |
1164 | * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all |
1165 | * the timestamp calculation for us. |
1166 | */ |
1167 | cs_timestamp = cs_etm__etmq_get_timestamp(etmq, trace_chan_id: &trace_chan_id); |
1168 | |
1169 | /* We found a timestamp, no need to continue. */ |
1170 | if (cs_timestamp) |
1171 | break; |
1172 | |
1173 | /* |
1174 | * We didn't find a timestamp so empty all the traceid packet |
1175 | * queues before looking for another timestamp packet, either |
1176 | * in the current data block or a new one. Packets that were |
1177 | * just decoded are useless since no timestamp has been |
1178 | * associated with them. As such simply discard them. |
1179 | */ |
1180 | cs_etm__clear_all_packet_queues(etmq); |
1181 | } |
1182 | |
1183 | /* |
1184 | * We have a timestamp. Add it to the min heap to reflect when |
1185 | * instructions conveyed by the range packets of this traceID queue |
1186 | * started to execute. Once the same has been done for all the traceID |
1187 | * queues of each etmq, redenring and decoding can start in |
1188 | * chronological order. |
1189 | * |
1190 | * Note that packets decoded above are still in the traceID's packet |
1191 | * queue and will be processed in cs_etm__process_timestamped_queues(). |
1192 | */ |
1193 | cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); |
1194 | ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); |
1195 | out: |
1196 | return ret; |
1197 | } |
1198 | |
1199 | static inline |
1200 | void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, |
1201 | struct cs_etm_traceid_queue *tidq) |
1202 | { |
1203 | struct branch_stack *bs_src = tidq->last_branch_rb; |
1204 | struct branch_stack *bs_dst = tidq->last_branch; |
1205 | size_t nr = 0; |
1206 | |
1207 | /* |
1208 | * Set the number of records before early exit: ->nr is used to |
1209 | * determine how many branches to copy from ->entries. |
1210 | */ |
1211 | bs_dst->nr = bs_src->nr; |
1212 | |
1213 | /* |
1214 | * Early exit when there is nothing to copy. |
1215 | */ |
1216 | if (!bs_src->nr) |
1217 | return; |
1218 | |
1219 | /* |
1220 | * As bs_src->entries is a circular buffer, we need to copy from it in |
1221 | * two steps. First, copy the branches from the most recently inserted |
1222 | * branch ->last_branch_pos until the end of bs_src->entries buffer. |
1223 | */ |
1224 | nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; |
1225 | memcpy(&bs_dst->entries[0], |
1226 | &bs_src->entries[tidq->last_branch_pos], |
1227 | sizeof(struct branch_entry) * nr); |
1228 | |
1229 | /* |
1230 | * If we wrapped around at least once, the branches from the beginning |
1231 | * of the bs_src->entries buffer and until the ->last_branch_pos element |
1232 | * are older valid branches: copy them over. The total number of |
1233 | * branches copied over will be equal to the number of branches asked by |
1234 | * the user in last_branch_sz. |
1235 | */ |
1236 | if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { |
1237 | memcpy(&bs_dst->entries[nr], |
1238 | &bs_src->entries[0], |
1239 | sizeof(struct branch_entry) * tidq->last_branch_pos); |
1240 | } |
1241 | } |
1242 | |
1243 | static inline |
1244 | void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) |
1245 | { |
1246 | tidq->last_branch_pos = 0; |
1247 | tidq->last_branch_rb->nr = 0; |
1248 | } |
1249 | |
1250 | static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, |
1251 | u8 trace_chan_id, u64 addr) |
1252 | { |
1253 | u8 instrBytes[2]; |
1254 | |
1255 | cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes), |
1256 | instrBytes, 0); |
1257 | /* |
1258 | * T32 instruction size is indicated by bits[15:11] of the first |
1259 | * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 |
1260 | * denote a 32-bit instruction. |
1261 | */ |
1262 | return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; |
1263 | } |
1264 | |
1265 | static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) |
1266 | { |
1267 | /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ |
1268 | if (packet->sample_type == CS_ETM_DISCONTINUITY) |
1269 | return 0; |
1270 | |
1271 | return packet->start_addr; |
1272 | } |
1273 | |
1274 | static inline |
1275 | u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) |
1276 | { |
1277 | /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ |
1278 | if (packet->sample_type == CS_ETM_DISCONTINUITY) |
1279 | return 0; |
1280 | |
1281 | return packet->end_addr - packet->last_instr_size; |
1282 | } |
1283 | |
1284 | static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, |
1285 | u64 trace_chan_id, |
1286 | const struct cs_etm_packet *packet, |
1287 | u64 offset) |
1288 | { |
1289 | if (packet->isa == CS_ETM_ISA_T32) { |
1290 | u64 addr = packet->start_addr; |
1291 | |
1292 | while (offset) { |
1293 | addr += cs_etm__t32_instr_size(etmq, |
1294 | trace_chan_id, addr); |
1295 | offset--; |
1296 | } |
1297 | return addr; |
1298 | } |
1299 | |
1300 | /* Assume a 4 byte instruction size (A32/A64) */ |
1301 | return packet->start_addr + offset * 4; |
1302 | } |
1303 | |
1304 | static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, |
1305 | struct cs_etm_traceid_queue *tidq) |
1306 | { |
1307 | struct branch_stack *bs = tidq->last_branch_rb; |
1308 | struct branch_entry *be; |
1309 | |
1310 | /* |
1311 | * The branches are recorded in a circular buffer in reverse |
1312 | * chronological order: we start recording from the last element of the |
1313 | * buffer down. After writing the first element of the stack, move the |
1314 | * insert position back to the end of the buffer. |
1315 | */ |
1316 | if (!tidq->last_branch_pos) |
1317 | tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; |
1318 | |
1319 | tidq->last_branch_pos -= 1; |
1320 | |
1321 | be = &bs->entries[tidq->last_branch_pos]; |
1322 | be->from = cs_etm__last_executed_instr(packet: tidq->prev_packet); |
1323 | be->to = cs_etm__first_executed_instr(packet: tidq->packet); |
1324 | /* No support for mispredict */ |
1325 | be->flags.mispred = 0; |
1326 | be->flags.predicted = 1; |
1327 | |
1328 | /* |
1329 | * Increment bs->nr until reaching the number of last branches asked by |
1330 | * the user on the command line. |
1331 | */ |
1332 | if (bs->nr < etmq->etm->synth_opts.last_branch_sz) |
1333 | bs->nr += 1; |
1334 | } |
1335 | |
1336 | static int cs_etm__inject_event(union perf_event *event, |
1337 | struct perf_sample *sample, u64 type) |
1338 | { |
1339 | event->header.size = perf_event__sample_event_size(sample, type, 0); |
1340 | return perf_event__synthesize_sample(event, type, 0, sample); |
1341 | } |
1342 | |
1343 | |
1344 | static int |
1345 | cs_etm__get_trace(struct cs_etm_queue *etmq) |
1346 | { |
1347 | struct auxtrace_buffer *aux_buffer = etmq->buffer; |
1348 | struct auxtrace_buffer *old_buffer = aux_buffer; |
1349 | struct auxtrace_queue *queue; |
1350 | |
1351 | queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; |
1352 | |
1353 | aux_buffer = auxtrace_buffer__next(queue, aux_buffer); |
1354 | |
1355 | /* If no more data, drop the previous auxtrace_buffer and return */ |
1356 | if (!aux_buffer) { |
1357 | if (old_buffer) |
1358 | auxtrace_buffer__drop_data(old_buffer); |
1359 | etmq->buf_len = 0; |
1360 | return 0; |
1361 | } |
1362 | |
1363 | etmq->buffer = aux_buffer; |
1364 | |
1365 | /* If the aux_buffer doesn't have data associated, try to load it */ |
1366 | if (!aux_buffer->data) { |
1367 | /* get the file desc associated with the perf data file */ |
1368 | int fd = perf_data__fd(data: etmq->etm->session->data); |
1369 | |
1370 | aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); |
1371 | if (!aux_buffer->data) |
1372 | return -ENOMEM; |
1373 | } |
1374 | |
1375 | /* If valid, drop the previous buffer */ |
1376 | if (old_buffer) |
1377 | auxtrace_buffer__drop_data(old_buffer); |
1378 | |
1379 | etmq->buf_used = 0; |
1380 | etmq->buf_len = aux_buffer->size; |
1381 | etmq->buf = aux_buffer->data; |
1382 | |
1383 | return etmq->buf_len; |
1384 | } |
1385 | |
1386 | static void cs_etm__set_thread(struct cs_etm_queue *etmq, |
1387 | struct cs_etm_traceid_queue *tidq, pid_t tid, |
1388 | ocsd_ex_level el) |
1389 | { |
1390 | struct machine *machine = cs_etm__get_machine(etmq, el); |
1391 | |
1392 | if (tid != -1) { |
1393 | thread__zput(tidq->thread); |
1394 | tidq->thread = machine__find_thread(machine, pid: -1, tid); |
1395 | } |
1396 | |
1397 | /* Couldn't find a known thread */ |
1398 | if (!tidq->thread) |
1399 | tidq->thread = machine__idle_thread(machine); |
1400 | |
1401 | tidq->el = el; |
1402 | } |
1403 | |
1404 | int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, |
1405 | u8 trace_chan_id, ocsd_ex_level el) |
1406 | { |
1407 | struct cs_etm_traceid_queue *tidq; |
1408 | |
1409 | tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); |
1410 | if (!tidq) |
1411 | return -EINVAL; |
1412 | |
1413 | cs_etm__set_thread(etmq, tidq, tid, el); |
1414 | return 0; |
1415 | } |
1416 | |
1417 | bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) |
1418 | { |
1419 | return !!etmq->etm->timeless_decoding; |
1420 | } |
1421 | |
1422 | static void cs_etm__copy_insn(struct cs_etm_queue *etmq, |
1423 | u64 trace_chan_id, |
1424 | const struct cs_etm_packet *packet, |
1425 | struct perf_sample *sample) |
1426 | { |
1427 | /* |
1428 | * It's pointless to read instructions for the CS_ETM_DISCONTINUITY |
1429 | * packet, so directly bail out with 'insn_len' = 0. |
1430 | */ |
1431 | if (packet->sample_type == CS_ETM_DISCONTINUITY) { |
1432 | sample->insn_len = 0; |
1433 | return; |
1434 | } |
1435 | |
1436 | /* |
1437 | * T32 instruction size might be 32-bit or 16-bit, decide by calling |
1438 | * cs_etm__t32_instr_size(). |
1439 | */ |
1440 | if (packet->isa == CS_ETM_ISA_T32) |
1441 | sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, |
1442 | addr: sample->ip); |
1443 | /* Otherwise, A64 and A32 instruction size are always 32-bit. */ |
1444 | else |
1445 | sample->insn_len = 4; |
1446 | |
1447 | cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len, |
1448 | (void *)sample->insn, 0); |
1449 | } |
1450 | |
1451 | u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) |
1452 | { |
1453 | struct cs_etm_auxtrace *etm = etmq->etm; |
1454 | |
1455 | if (etm->has_virtual_ts) |
1456 | return tsc_to_perf_time(cyc: cs_timestamp, tc: &etm->tc); |
1457 | else |
1458 | return cs_timestamp; |
1459 | } |
1460 | |
1461 | static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, |
1462 | struct cs_etm_traceid_queue *tidq) |
1463 | { |
1464 | struct cs_etm_auxtrace *etm = etmq->etm; |
1465 | struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; |
1466 | |
1467 | if (!etm->timeless_decoding && etm->has_virtual_ts) |
1468 | return packet_queue->cs_timestamp; |
1469 | else |
1470 | return etm->latest_kernel_timestamp; |
1471 | } |
1472 | |
1473 | static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, |
1474 | struct cs_etm_traceid_queue *tidq, |
1475 | u64 addr, u64 period) |
1476 | { |
1477 | int ret = 0; |
1478 | struct cs_etm_auxtrace *etm = etmq->etm; |
1479 | union perf_event *event = tidq->event_buf; |
1480 | struct perf_sample sample = {.ip = 0,}; |
1481 | |
1482 | event->sample.header.type = PERF_RECORD_SAMPLE; |
1483 | event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); |
1484 | event->sample.header.size = sizeof(struct perf_event_header); |
1485 | |
1486 | /* Set time field based on etm auxtrace config. */ |
1487 | sample.time = cs_etm__resolve_sample_time(etmq, tidq); |
1488 | |
1489 | sample.ip = addr; |
1490 | sample.pid = thread__pid(thread: tidq->thread); |
1491 | sample.tid = thread__tid(thread: tidq->thread); |
1492 | sample.id = etmq->etm->instructions_id; |
1493 | sample.stream_id = etmq->etm->instructions_id; |
1494 | sample.period = period; |
1495 | sample.cpu = tidq->packet->cpu; |
1496 | sample.flags = tidq->prev_packet->flags; |
1497 | sample.cpumode = event->sample.header.misc; |
1498 | |
1499 | cs_etm__copy_insn(etmq, trace_chan_id: tidq->trace_chan_id, packet: tidq->packet, sample: &sample); |
1500 | |
1501 | if (etm->synth_opts.last_branch) |
1502 | sample.branch_stack = tidq->last_branch; |
1503 | |
1504 | if (etm->synth_opts.inject) { |
1505 | ret = cs_etm__inject_event(event, sample: &sample, |
1506 | type: etm->instructions_sample_type); |
1507 | if (ret) |
1508 | return ret; |
1509 | } |
1510 | |
1511 | ret = perf_session__deliver_synth_event(session: etm->session, event, sample: &sample); |
1512 | |
1513 | if (ret) |
1514 | pr_err( |
1515 | "CS ETM Trace: failed to deliver instruction event, error %d\n" , |
1516 | ret); |
1517 | |
1518 | return ret; |
1519 | } |
1520 | |
1521 | /* |
1522 | * The cs etm packet encodes an instruction range between a branch target |
1523 | * and the next taken branch. Generate sample accordingly. |
1524 | */ |
1525 | static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, |
1526 | struct cs_etm_traceid_queue *tidq) |
1527 | { |
1528 | int ret = 0; |
1529 | struct cs_etm_auxtrace *etm = etmq->etm; |
1530 | struct perf_sample sample = {.ip = 0,}; |
1531 | union perf_event *event = tidq->event_buf; |
1532 | struct dummy_branch_stack { |
1533 | u64 nr; |
1534 | u64 hw_idx; |
1535 | struct branch_entry entries; |
1536 | } dummy_bs; |
1537 | u64 ip; |
1538 | |
1539 | ip = cs_etm__last_executed_instr(packet: tidq->prev_packet); |
1540 | |
1541 | event->sample.header.type = PERF_RECORD_SAMPLE; |
1542 | event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, |
1543 | tidq->prev_packet_el); |
1544 | event->sample.header.size = sizeof(struct perf_event_header); |
1545 | |
1546 | /* Set time field based on etm auxtrace config. */ |
1547 | sample.time = cs_etm__resolve_sample_time(etmq, tidq); |
1548 | |
1549 | sample.ip = ip; |
1550 | sample.pid = thread__pid(thread: tidq->prev_packet_thread); |
1551 | sample.tid = thread__tid(thread: tidq->prev_packet_thread); |
1552 | sample.addr = cs_etm__first_executed_instr(packet: tidq->packet); |
1553 | sample.id = etmq->etm->branches_id; |
1554 | sample.stream_id = etmq->etm->branches_id; |
1555 | sample.period = 1; |
1556 | sample.cpu = tidq->packet->cpu; |
1557 | sample.flags = tidq->prev_packet->flags; |
1558 | sample.cpumode = event->sample.header.misc; |
1559 | |
1560 | cs_etm__copy_insn(etmq, trace_chan_id: tidq->trace_chan_id, packet: tidq->prev_packet, |
1561 | sample: &sample); |
1562 | |
1563 | /* |
1564 | * perf report cannot handle events without a branch stack |
1565 | */ |
1566 | if (etm->synth_opts.last_branch) { |
1567 | dummy_bs = (struct dummy_branch_stack){ |
1568 | .nr = 1, |
1569 | .hw_idx = -1ULL, |
1570 | .entries = { |
1571 | .from = sample.ip, |
1572 | .to = sample.addr, |
1573 | }, |
1574 | }; |
1575 | sample.branch_stack = (struct branch_stack *)&dummy_bs; |
1576 | } |
1577 | |
1578 | if (etm->synth_opts.inject) { |
1579 | ret = cs_etm__inject_event(event, sample: &sample, |
1580 | type: etm->branches_sample_type); |
1581 | if (ret) |
1582 | return ret; |
1583 | } |
1584 | |
1585 | ret = perf_session__deliver_synth_event(session: etm->session, event, sample: &sample); |
1586 | |
1587 | if (ret) |
1588 | pr_err( |
1589 | "CS ETM Trace: failed to deliver instruction event, error %d\n" , |
1590 | ret); |
1591 | |
1592 | return ret; |
1593 | } |
1594 | |
1595 | struct cs_etm_synth { |
1596 | struct perf_tool dummy_tool; |
1597 | struct perf_session *session; |
1598 | }; |
1599 | |
1600 | static int cs_etm__event_synth(struct perf_tool *tool, |
1601 | union perf_event *event, |
1602 | struct perf_sample *sample __maybe_unused, |
1603 | struct machine *machine __maybe_unused) |
1604 | { |
1605 | struct cs_etm_synth *cs_etm_synth = |
1606 | container_of(tool, struct cs_etm_synth, dummy_tool); |
1607 | |
1608 | return perf_session__deliver_synth_event(session: cs_etm_synth->session, |
1609 | event, NULL); |
1610 | } |
1611 | |
1612 | static int cs_etm__synth_event(struct perf_session *session, |
1613 | struct perf_event_attr *attr, u64 id) |
1614 | { |
1615 | struct cs_etm_synth cs_etm_synth; |
1616 | |
1617 | memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); |
1618 | cs_etm_synth.session = session; |
1619 | |
1620 | return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, |
1621 | &id, cs_etm__event_synth); |
1622 | } |
1623 | |
1624 | static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, |
1625 | struct perf_session *session) |
1626 | { |
1627 | struct evlist *evlist = session->evlist; |
1628 | struct evsel *evsel; |
1629 | struct perf_event_attr attr; |
1630 | bool found = false; |
1631 | u64 id; |
1632 | int err; |
1633 | |
1634 | evlist__for_each_entry(evlist, evsel) { |
1635 | if (evsel->core.attr.type == etm->pmu_type) { |
1636 | found = true; |
1637 | break; |
1638 | } |
1639 | } |
1640 | |
1641 | if (!found) { |
1642 | pr_debug("No selected events with CoreSight Trace data\n" ); |
1643 | return 0; |
1644 | } |
1645 | |
1646 | memset(&attr, 0, sizeof(struct perf_event_attr)); |
1647 | attr.size = sizeof(struct perf_event_attr); |
1648 | attr.type = PERF_TYPE_HARDWARE; |
1649 | attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; |
1650 | attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | |
1651 | PERF_SAMPLE_PERIOD; |
1652 | if (etm->timeless_decoding) |
1653 | attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; |
1654 | else |
1655 | attr.sample_type |= PERF_SAMPLE_TIME; |
1656 | |
1657 | attr.exclude_user = evsel->core.attr.exclude_user; |
1658 | attr.exclude_kernel = evsel->core.attr.exclude_kernel; |
1659 | attr.exclude_hv = evsel->core.attr.exclude_hv; |
1660 | attr.exclude_host = evsel->core.attr.exclude_host; |
1661 | attr.exclude_guest = evsel->core.attr.exclude_guest; |
1662 | attr.sample_id_all = evsel->core.attr.sample_id_all; |
1663 | attr.read_format = evsel->core.attr.read_format; |
1664 | |
1665 | /* create new id val to be a fixed offset from evsel id */ |
1666 | id = evsel->core.id[0] + 1000000000; |
1667 | |
1668 | if (!id) |
1669 | id = 1; |
1670 | |
1671 | if (etm->synth_opts.branches) { |
1672 | attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; |
1673 | attr.sample_period = 1; |
1674 | attr.sample_type |= PERF_SAMPLE_ADDR; |
1675 | err = cs_etm__synth_event(session, attr: &attr, id); |
1676 | if (err) |
1677 | return err; |
1678 | etm->branches_sample_type = attr.sample_type; |
1679 | etm->branches_id = id; |
1680 | id += 1; |
1681 | attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; |
1682 | } |
1683 | |
1684 | if (etm->synth_opts.last_branch) { |
1685 | attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; |
1686 | /* |
1687 | * We don't use the hardware index, but the sample generation |
1688 | * code uses the new format branch_stack with this field, |
1689 | * so the event attributes must indicate that it's present. |
1690 | */ |
1691 | attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; |
1692 | } |
1693 | |
1694 | if (etm->synth_opts.instructions) { |
1695 | attr.config = PERF_COUNT_HW_INSTRUCTIONS; |
1696 | attr.sample_period = etm->synth_opts.period; |
1697 | etm->instructions_sample_period = attr.sample_period; |
1698 | err = cs_etm__synth_event(session, attr: &attr, id); |
1699 | if (err) |
1700 | return err; |
1701 | etm->instructions_sample_type = attr.sample_type; |
1702 | etm->instructions_id = id; |
1703 | id += 1; |
1704 | } |
1705 | |
1706 | return 0; |
1707 | } |
1708 | |
1709 | static int cs_etm__sample(struct cs_etm_queue *etmq, |
1710 | struct cs_etm_traceid_queue *tidq) |
1711 | { |
1712 | struct cs_etm_auxtrace *etm = etmq->etm; |
1713 | int ret; |
1714 | u8 trace_chan_id = tidq->trace_chan_id; |
1715 | u64 instrs_prev; |
1716 | |
1717 | /* Get instructions remainder from previous packet */ |
1718 | instrs_prev = tidq->period_instructions; |
1719 | |
1720 | tidq->period_instructions += tidq->packet->instr_count; |
1721 | |
1722 | /* |
1723 | * Record a branch when the last instruction in |
1724 | * PREV_PACKET is a branch. |
1725 | */ |
1726 | if (etm->synth_opts.last_branch && |
1727 | tidq->prev_packet->sample_type == CS_ETM_RANGE && |
1728 | tidq->prev_packet->last_instr_taken_branch) |
1729 | cs_etm__update_last_branch_rb(etmq, tidq); |
1730 | |
1731 | if (etm->synth_opts.instructions && |
1732 | tidq->period_instructions >= etm->instructions_sample_period) { |
1733 | /* |
1734 | * Emit instruction sample periodically |
1735 | * TODO: allow period to be defined in cycles and clock time |
1736 | */ |
1737 | |
1738 | /* |
1739 | * Below diagram demonstrates the instruction samples |
1740 | * generation flows: |
1741 | * |
1742 | * Instrs Instrs Instrs Instrs |
1743 | * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) |
1744 | * | | | | |
1745 | * V V V V |
1746 | * -------------------------------------------------- |
1747 | * ^ ^ |
1748 | * | | |
1749 | * Period Period |
1750 | * instructions(Pi) instructions(Pi') |
1751 | * |
1752 | * | | |
1753 | * \---------------- -----------------/ |
1754 | * V |
1755 | * tidq->packet->instr_count |
1756 | * |
1757 | * Instrs Sample(n...) are the synthesised samples occurring |
1758 | * every etm->instructions_sample_period instructions - as |
1759 | * defined on the perf command line. Sample(n) is being the |
1760 | * last sample before the current etm packet, n+1 to n+3 |
1761 | * samples are generated from the current etm packet. |
1762 | * |
1763 | * tidq->packet->instr_count represents the number of |
1764 | * instructions in the current etm packet. |
1765 | * |
1766 | * Period instructions (Pi) contains the number of |
1767 | * instructions executed after the sample point(n) from the |
1768 | * previous etm packet. This will always be less than |
1769 | * etm->instructions_sample_period. |
1770 | * |
1771 | * When generate new samples, it combines with two parts |
1772 | * instructions, one is the tail of the old packet and another |
1773 | * is the head of the new coming packet, to generate |
1774 | * sample(n+1); sample(n+2) and sample(n+3) consume the |
1775 | * instructions with sample period. After sample(n+3), the rest |
1776 | * instructions will be used by later packet and it is assigned |
1777 | * to tidq->period_instructions for next round calculation. |
1778 | */ |
1779 | |
1780 | /* |
1781 | * Get the initial offset into the current packet instructions; |
1782 | * entry conditions ensure that instrs_prev is less than |
1783 | * etm->instructions_sample_period. |
1784 | */ |
1785 | u64 offset = etm->instructions_sample_period - instrs_prev; |
1786 | u64 addr; |
1787 | |
1788 | /* Prepare last branches for instruction sample */ |
1789 | if (etm->synth_opts.last_branch) |
1790 | cs_etm__copy_last_branch_rb(etmq, tidq); |
1791 | |
1792 | while (tidq->period_instructions >= |
1793 | etm->instructions_sample_period) { |
1794 | /* |
1795 | * Calculate the address of the sampled instruction (-1 |
1796 | * as sample is reported as though instruction has just |
1797 | * been executed, but PC has not advanced to next |
1798 | * instruction) |
1799 | */ |
1800 | addr = cs_etm__instr_addr(etmq, trace_chan_id, |
1801 | packet: tidq->packet, offset: offset - 1); |
1802 | ret = cs_etm__synth_instruction_sample( |
1803 | etmq, tidq, addr, |
1804 | period: etm->instructions_sample_period); |
1805 | if (ret) |
1806 | return ret; |
1807 | |
1808 | offset += etm->instructions_sample_period; |
1809 | tidq->period_instructions -= |
1810 | etm->instructions_sample_period; |
1811 | } |
1812 | } |
1813 | |
1814 | if (etm->synth_opts.branches) { |
1815 | bool generate_sample = false; |
1816 | |
1817 | /* Generate sample for tracing on packet */ |
1818 | if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) |
1819 | generate_sample = true; |
1820 | |
1821 | /* Generate sample for branch taken packet */ |
1822 | if (tidq->prev_packet->sample_type == CS_ETM_RANGE && |
1823 | tidq->prev_packet->last_instr_taken_branch) |
1824 | generate_sample = true; |
1825 | |
1826 | if (generate_sample) { |
1827 | ret = cs_etm__synth_branch_sample(etmq, tidq); |
1828 | if (ret) |
1829 | return ret; |
1830 | } |
1831 | } |
1832 | |
1833 | cs_etm__packet_swap(etm, tidq); |
1834 | |
1835 | return 0; |
1836 | } |
1837 | |
1838 | static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) |
1839 | { |
1840 | /* |
1841 | * When the exception packet is inserted, whether the last instruction |
1842 | * in previous range packet is taken branch or not, we need to force |
1843 | * to set 'prev_packet->last_instr_taken_branch' to true. This ensures |
1844 | * to generate branch sample for the instruction range before the |
1845 | * exception is trapped to kernel or before the exception returning. |
1846 | * |
1847 | * The exception packet includes the dummy address values, so don't |
1848 | * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful |
1849 | * for generating instruction and branch samples. |
1850 | */ |
1851 | if (tidq->prev_packet->sample_type == CS_ETM_RANGE) |
1852 | tidq->prev_packet->last_instr_taken_branch = true; |
1853 | |
1854 | return 0; |
1855 | } |
1856 | |
1857 | static int cs_etm__flush(struct cs_etm_queue *etmq, |
1858 | struct cs_etm_traceid_queue *tidq) |
1859 | { |
1860 | int err = 0; |
1861 | struct cs_etm_auxtrace *etm = etmq->etm; |
1862 | |
1863 | /* Handle start tracing packet */ |
1864 | if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) |
1865 | goto swap_packet; |
1866 | |
1867 | if (etmq->etm->synth_opts.last_branch && |
1868 | etmq->etm->synth_opts.instructions && |
1869 | tidq->prev_packet->sample_type == CS_ETM_RANGE) { |
1870 | u64 addr; |
1871 | |
1872 | /* Prepare last branches for instruction sample */ |
1873 | cs_etm__copy_last_branch_rb(etmq, tidq); |
1874 | |
1875 | /* |
1876 | * Generate a last branch event for the branches left in the |
1877 | * circular buffer at the end of the trace. |
1878 | * |
1879 | * Use the address of the end of the last reported execution |
1880 | * range |
1881 | */ |
1882 | addr = cs_etm__last_executed_instr(packet: tidq->prev_packet); |
1883 | |
1884 | err = cs_etm__synth_instruction_sample( |
1885 | etmq, tidq, addr, |
1886 | period: tidq->period_instructions); |
1887 | if (err) |
1888 | return err; |
1889 | |
1890 | tidq->period_instructions = 0; |
1891 | |
1892 | } |
1893 | |
1894 | if (etm->synth_opts.branches && |
1895 | tidq->prev_packet->sample_type == CS_ETM_RANGE) { |
1896 | err = cs_etm__synth_branch_sample(etmq, tidq); |
1897 | if (err) |
1898 | return err; |
1899 | } |
1900 | |
1901 | swap_packet: |
1902 | cs_etm__packet_swap(etm, tidq); |
1903 | |
1904 | /* Reset last branches after flush the trace */ |
1905 | if (etm->synth_opts.last_branch) |
1906 | cs_etm__reset_last_branch_rb(tidq); |
1907 | |
1908 | return err; |
1909 | } |
1910 | |
1911 | static int cs_etm__end_block(struct cs_etm_queue *etmq, |
1912 | struct cs_etm_traceid_queue *tidq) |
1913 | { |
1914 | int err; |
1915 | |
1916 | /* |
1917 | * It has no new packet coming and 'etmq->packet' contains the stale |
1918 | * packet which was set at the previous time with packets swapping; |
1919 | * so skip to generate branch sample to avoid stale packet. |
1920 | * |
1921 | * For this case only flush branch stack and generate a last branch |
1922 | * event for the branches left in the circular buffer at the end of |
1923 | * the trace. |
1924 | */ |
1925 | if (etmq->etm->synth_opts.last_branch && |
1926 | etmq->etm->synth_opts.instructions && |
1927 | tidq->prev_packet->sample_type == CS_ETM_RANGE) { |
1928 | u64 addr; |
1929 | |
1930 | /* Prepare last branches for instruction sample */ |
1931 | cs_etm__copy_last_branch_rb(etmq, tidq); |
1932 | |
1933 | /* |
1934 | * Use the address of the end of the last reported execution |
1935 | * range. |
1936 | */ |
1937 | addr = cs_etm__last_executed_instr(packet: tidq->prev_packet); |
1938 | |
1939 | err = cs_etm__synth_instruction_sample( |
1940 | etmq, tidq, addr, |
1941 | period: tidq->period_instructions); |
1942 | if (err) |
1943 | return err; |
1944 | |
1945 | tidq->period_instructions = 0; |
1946 | } |
1947 | |
1948 | return 0; |
1949 | } |
1950 | /* |
1951 | * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue |
1952 | * if need be. |
1953 | * Returns: < 0 if error |
1954 | * = 0 if no more auxtrace_buffer to read |
1955 | * > 0 if the current buffer isn't empty yet |
1956 | */ |
1957 | static int cs_etm__get_data_block(struct cs_etm_queue *etmq) |
1958 | { |
1959 | int ret; |
1960 | |
1961 | if (!etmq->buf_len) { |
1962 | ret = cs_etm__get_trace(etmq); |
1963 | if (ret <= 0) |
1964 | return ret; |
1965 | /* |
1966 | * We cannot assume consecutive blocks in the data file |
1967 | * are contiguous, reset the decoder to force re-sync. |
1968 | */ |
1969 | ret = cs_etm_decoder__reset(decoder: etmq->decoder); |
1970 | if (ret) |
1971 | return ret; |
1972 | } |
1973 | |
1974 | return etmq->buf_len; |
1975 | } |
1976 | |
1977 | static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, |
1978 | struct cs_etm_packet *packet, |
1979 | u64 end_addr) |
1980 | { |
1981 | /* Initialise to keep compiler happy */ |
1982 | u16 instr16 = 0; |
1983 | u32 instr32 = 0; |
1984 | u64 addr; |
1985 | |
1986 | switch (packet->isa) { |
1987 | case CS_ETM_ISA_T32: |
1988 | /* |
1989 | * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: |
1990 | * |
1991 | * b'15 b'8 |
1992 | * +-----------------+--------+ |
1993 | * | 1 1 0 1 1 1 1 1 | imm8 | |
1994 | * +-----------------+--------+ |
1995 | * |
1996 | * According to the specification, it only defines SVC for T32 |
1997 | * with 16 bits instruction and has no definition for 32bits; |
1998 | * so below only read 2 bytes as instruction size for T32. |
1999 | */ |
2000 | addr = end_addr - 2; |
2001 | cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16), |
2002 | (u8 *)&instr16, 0); |
2003 | if ((instr16 & 0xFF00) == 0xDF00) |
2004 | return true; |
2005 | |
2006 | break; |
2007 | case CS_ETM_ISA_A32: |
2008 | /* |
2009 | * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: |
2010 | * |
2011 | * b'31 b'28 b'27 b'24 |
2012 | * +---------+---------+-------------------------+ |
2013 | * | !1111 | 1 1 1 1 | imm24 | |
2014 | * +---------+---------+-------------------------+ |
2015 | */ |
2016 | addr = end_addr - 4; |
2017 | cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), |
2018 | (u8 *)&instr32, 0); |
2019 | if ((instr32 & 0x0F000000) == 0x0F000000 && |
2020 | (instr32 & 0xF0000000) != 0xF0000000) |
2021 | return true; |
2022 | |
2023 | break; |
2024 | case CS_ETM_ISA_A64: |
2025 | /* |
2026 | * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: |
2027 | * |
2028 | * b'31 b'21 b'4 b'0 |
2029 | * +-----------------------+---------+-----------+ |
2030 | * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | |
2031 | * +-----------------------+---------+-----------+ |
2032 | */ |
2033 | addr = end_addr - 4; |
2034 | cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), |
2035 | (u8 *)&instr32, 0); |
2036 | if ((instr32 & 0xFFE0001F) == 0xd4000001) |
2037 | return true; |
2038 | |
2039 | break; |
2040 | case CS_ETM_ISA_UNKNOWN: |
2041 | default: |
2042 | break; |
2043 | } |
2044 | |
2045 | return false; |
2046 | } |
2047 | |
2048 | static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, |
2049 | struct cs_etm_traceid_queue *tidq, u64 magic) |
2050 | { |
2051 | u8 trace_chan_id = tidq->trace_chan_id; |
2052 | struct cs_etm_packet *packet = tidq->packet; |
2053 | struct cs_etm_packet *prev_packet = tidq->prev_packet; |
2054 | |
2055 | if (magic == __perf_cs_etmv3_magic) |
2056 | if (packet->exception_number == CS_ETMV3_EXC_SVC) |
2057 | return true; |
2058 | |
2059 | /* |
2060 | * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and |
2061 | * HVC cases; need to check if it's SVC instruction based on |
2062 | * packet address. |
2063 | */ |
2064 | if (magic == __perf_cs_etmv4_magic) { |
2065 | if (packet->exception_number == CS_ETMV4_EXC_CALL && |
2066 | cs_etm__is_svc_instr(etmq, trace_chan_id, packet: prev_packet, |
2067 | end_addr: prev_packet->end_addr)) |
2068 | return true; |
2069 | } |
2070 | |
2071 | return false; |
2072 | } |
2073 | |
2074 | static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, |
2075 | u64 magic) |
2076 | { |
2077 | struct cs_etm_packet *packet = tidq->packet; |
2078 | |
2079 | if (magic == __perf_cs_etmv3_magic) |
2080 | if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || |
2081 | packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || |
2082 | packet->exception_number == CS_ETMV3_EXC_PE_RESET || |
2083 | packet->exception_number == CS_ETMV3_EXC_IRQ || |
2084 | packet->exception_number == CS_ETMV3_EXC_FIQ) |
2085 | return true; |
2086 | |
2087 | if (magic == __perf_cs_etmv4_magic) |
2088 | if (packet->exception_number == CS_ETMV4_EXC_RESET || |
2089 | packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || |
2090 | packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || |
2091 | packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || |
2092 | packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || |
2093 | packet->exception_number == CS_ETMV4_EXC_IRQ || |
2094 | packet->exception_number == CS_ETMV4_EXC_FIQ) |
2095 | return true; |
2096 | |
2097 | return false; |
2098 | } |
2099 | |
2100 | static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, |
2101 | struct cs_etm_traceid_queue *tidq, |
2102 | u64 magic) |
2103 | { |
2104 | u8 trace_chan_id = tidq->trace_chan_id; |
2105 | struct cs_etm_packet *packet = tidq->packet; |
2106 | struct cs_etm_packet *prev_packet = tidq->prev_packet; |
2107 | |
2108 | if (magic == __perf_cs_etmv3_magic) |
2109 | if (packet->exception_number == CS_ETMV3_EXC_SMC || |
2110 | packet->exception_number == CS_ETMV3_EXC_HYP || |
2111 | packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || |
2112 | packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || |
2113 | packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || |
2114 | packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || |
2115 | packet->exception_number == CS_ETMV3_EXC_GENERIC) |
2116 | return true; |
2117 | |
2118 | if (magic == __perf_cs_etmv4_magic) { |
2119 | if (packet->exception_number == CS_ETMV4_EXC_TRAP || |
2120 | packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || |
2121 | packet->exception_number == CS_ETMV4_EXC_INST_FAULT || |
2122 | packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) |
2123 | return true; |
2124 | |
2125 | /* |
2126 | * For CS_ETMV4_EXC_CALL, except SVC other instructions |
2127 | * (SMC, HVC) are taken as sync exceptions. |
2128 | */ |
2129 | if (packet->exception_number == CS_ETMV4_EXC_CALL && |
2130 | !cs_etm__is_svc_instr(etmq, trace_chan_id, packet: prev_packet, |
2131 | end_addr: prev_packet->end_addr)) |
2132 | return true; |
2133 | |
2134 | /* |
2135 | * ETMv4 has 5 bits for exception number; if the numbers |
2136 | * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] |
2137 | * they are implementation defined exceptions. |
2138 | * |
2139 | * For this case, simply take it as sync exception. |
2140 | */ |
2141 | if (packet->exception_number > CS_ETMV4_EXC_FIQ && |
2142 | packet->exception_number <= CS_ETMV4_EXC_END) |
2143 | return true; |
2144 | } |
2145 | |
2146 | return false; |
2147 | } |
2148 | |
2149 | static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, |
2150 | struct cs_etm_traceid_queue *tidq) |
2151 | { |
2152 | struct cs_etm_packet *packet = tidq->packet; |
2153 | struct cs_etm_packet *prev_packet = tidq->prev_packet; |
2154 | u8 trace_chan_id = tidq->trace_chan_id; |
2155 | u64 magic; |
2156 | int ret; |
2157 | |
2158 | switch (packet->sample_type) { |
2159 | case CS_ETM_RANGE: |
2160 | /* |
2161 | * Immediate branch instruction without neither link nor |
2162 | * return flag, it's normal branch instruction within |
2163 | * the function. |
2164 | */ |
2165 | if (packet->last_instr_type == OCSD_INSTR_BR && |
2166 | packet->last_instr_subtype == OCSD_S_INSTR_NONE) { |
2167 | packet->flags = PERF_IP_FLAG_BRANCH; |
2168 | |
2169 | if (packet->last_instr_cond) |
2170 | packet->flags |= PERF_IP_FLAG_CONDITIONAL; |
2171 | } |
2172 | |
2173 | /* |
2174 | * Immediate branch instruction with link (e.g. BL), this is |
2175 | * branch instruction for function call. |
2176 | */ |
2177 | if (packet->last_instr_type == OCSD_INSTR_BR && |
2178 | packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) |
2179 | packet->flags = PERF_IP_FLAG_BRANCH | |
2180 | PERF_IP_FLAG_CALL; |
2181 | |
2182 | /* |
2183 | * Indirect branch instruction with link (e.g. BLR), this is |
2184 | * branch instruction for function call. |
2185 | */ |
2186 | if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && |
2187 | packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) |
2188 | packet->flags = PERF_IP_FLAG_BRANCH | |
2189 | PERF_IP_FLAG_CALL; |
2190 | |
2191 | /* |
2192 | * Indirect branch instruction with subtype of |
2193 | * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for |
2194 | * function return for A32/T32. |
2195 | */ |
2196 | if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && |
2197 | packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) |
2198 | packet->flags = PERF_IP_FLAG_BRANCH | |
2199 | PERF_IP_FLAG_RETURN; |
2200 | |
2201 | /* |
2202 | * Indirect branch instruction without link (e.g. BR), usually |
2203 | * this is used for function return, especially for functions |
2204 | * within dynamic link lib. |
2205 | */ |
2206 | if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && |
2207 | packet->last_instr_subtype == OCSD_S_INSTR_NONE) |
2208 | packet->flags = PERF_IP_FLAG_BRANCH | |
2209 | PERF_IP_FLAG_RETURN; |
2210 | |
2211 | /* Return instruction for function return. */ |
2212 | if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && |
2213 | packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) |
2214 | packet->flags = PERF_IP_FLAG_BRANCH | |
2215 | PERF_IP_FLAG_RETURN; |
2216 | |
2217 | /* |
2218 | * Decoder might insert a discontinuity in the middle of |
2219 | * instruction packets, fixup prev_packet with flag |
2220 | * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. |
2221 | */ |
2222 | if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) |
2223 | prev_packet->flags |= PERF_IP_FLAG_BRANCH | |
2224 | PERF_IP_FLAG_TRACE_BEGIN; |
2225 | |
2226 | /* |
2227 | * If the previous packet is an exception return packet |
2228 | * and the return address just follows SVC instruction, |
2229 | * it needs to calibrate the previous packet sample flags |
2230 | * as PERF_IP_FLAG_SYSCALLRET. |
2231 | */ |
2232 | if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | |
2233 | PERF_IP_FLAG_RETURN | |
2234 | PERF_IP_FLAG_INTERRUPT) && |
2235 | cs_etm__is_svc_instr(etmq, trace_chan_id, |
2236 | packet, end_addr: packet->start_addr)) |
2237 | prev_packet->flags = PERF_IP_FLAG_BRANCH | |
2238 | PERF_IP_FLAG_RETURN | |
2239 | PERF_IP_FLAG_SYSCALLRET; |
2240 | break; |
2241 | case CS_ETM_DISCONTINUITY: |
2242 | /* |
2243 | * The trace is discontinuous, if the previous packet is |
2244 | * instruction packet, set flag PERF_IP_FLAG_TRACE_END |
2245 | * for previous packet. |
2246 | */ |
2247 | if (prev_packet->sample_type == CS_ETM_RANGE) |
2248 | prev_packet->flags |= PERF_IP_FLAG_BRANCH | |
2249 | PERF_IP_FLAG_TRACE_END; |
2250 | break; |
2251 | case CS_ETM_EXCEPTION: |
2252 | ret = cs_etm__get_magic(trace_chan_id: packet->trace_chan_id, magic: &magic); |
2253 | if (ret) |
2254 | return ret; |
2255 | |
2256 | /* The exception is for system call. */ |
2257 | if (cs_etm__is_syscall(etmq, tidq, magic)) |
2258 | packet->flags = PERF_IP_FLAG_BRANCH | |
2259 | PERF_IP_FLAG_CALL | |
2260 | PERF_IP_FLAG_SYSCALLRET; |
2261 | /* |
2262 | * The exceptions are triggered by external signals from bus, |
2263 | * interrupt controller, debug module, PE reset or halt. |
2264 | */ |
2265 | else if (cs_etm__is_async_exception(tidq, magic)) |
2266 | packet->flags = PERF_IP_FLAG_BRANCH | |
2267 | PERF_IP_FLAG_CALL | |
2268 | PERF_IP_FLAG_ASYNC | |
2269 | PERF_IP_FLAG_INTERRUPT; |
2270 | /* |
2271 | * Otherwise, exception is caused by trap, instruction & |
2272 | * data fault, or alignment errors. |
2273 | */ |
2274 | else if (cs_etm__is_sync_exception(etmq, tidq, magic)) |
2275 | packet->flags = PERF_IP_FLAG_BRANCH | |
2276 | PERF_IP_FLAG_CALL | |
2277 | PERF_IP_FLAG_INTERRUPT; |
2278 | |
2279 | /* |
2280 | * When the exception packet is inserted, since exception |
2281 | * packet is not used standalone for generating samples |
2282 | * and it's affiliation to the previous instruction range |
2283 | * packet; so set previous range packet flags to tell perf |
2284 | * it is an exception taken branch. |
2285 | */ |
2286 | if (prev_packet->sample_type == CS_ETM_RANGE) |
2287 | prev_packet->flags = packet->flags; |
2288 | break; |
2289 | case CS_ETM_EXCEPTION_RET: |
2290 | /* |
2291 | * When the exception return packet is inserted, since |
2292 | * exception return packet is not used standalone for |
2293 | * generating samples and it's affiliation to the previous |
2294 | * instruction range packet; so set previous range packet |
2295 | * flags to tell perf it is an exception return branch. |
2296 | * |
2297 | * The exception return can be for either system call or |
2298 | * other exception types; unfortunately the packet doesn't |
2299 | * contain exception type related info so we cannot decide |
2300 | * the exception type purely based on exception return packet. |
2301 | * If we record the exception number from exception packet and |
2302 | * reuse it for exception return packet, this is not reliable |
2303 | * due the trace can be discontinuity or the interrupt can |
2304 | * be nested, thus the recorded exception number cannot be |
2305 | * used for exception return packet for these two cases. |
2306 | * |
2307 | * For exception return packet, we only need to distinguish the |
2308 | * packet is for system call or for other types. Thus the |
2309 | * decision can be deferred when receive the next packet which |
2310 | * contains the return address, based on the return address we |
2311 | * can read out the previous instruction and check if it's a |
2312 | * system call instruction and then calibrate the sample flag |
2313 | * as needed. |
2314 | */ |
2315 | if (prev_packet->sample_type == CS_ETM_RANGE) |
2316 | prev_packet->flags = PERF_IP_FLAG_BRANCH | |
2317 | PERF_IP_FLAG_RETURN | |
2318 | PERF_IP_FLAG_INTERRUPT; |
2319 | break; |
2320 | case CS_ETM_EMPTY: |
2321 | default: |
2322 | break; |
2323 | } |
2324 | |
2325 | return 0; |
2326 | } |
2327 | |
2328 | static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) |
2329 | { |
2330 | int ret = 0; |
2331 | size_t processed = 0; |
2332 | |
2333 | /* |
2334 | * Packets are decoded and added to the decoder's packet queue |
2335 | * until the decoder packet processing callback has requested that |
2336 | * processing stops or there is nothing left in the buffer. Normal |
2337 | * operations that stop processing are a timestamp packet or a full |
2338 | * decoder buffer queue. |
2339 | */ |
2340 | ret = cs_etm_decoder__process_data_block(decoder: etmq->decoder, |
2341 | indx: etmq->offset, |
2342 | buf: &etmq->buf[etmq->buf_used], |
2343 | len: etmq->buf_len, |
2344 | consumed: &processed); |
2345 | if (ret) |
2346 | goto out; |
2347 | |
2348 | etmq->offset += processed; |
2349 | etmq->buf_used += processed; |
2350 | etmq->buf_len -= processed; |
2351 | |
2352 | out: |
2353 | return ret; |
2354 | } |
2355 | |
2356 | static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, |
2357 | struct cs_etm_traceid_queue *tidq) |
2358 | { |
2359 | int ret; |
2360 | struct cs_etm_packet_queue *packet_queue; |
2361 | |
2362 | packet_queue = &tidq->packet_queue; |
2363 | |
2364 | /* Process each packet in this chunk */ |
2365 | while (1) { |
2366 | ret = cs_etm_decoder__get_packet(packet_queue, |
2367 | packet: tidq->packet); |
2368 | if (ret <= 0) |
2369 | /* |
2370 | * Stop processing this chunk on |
2371 | * end of data or error |
2372 | */ |
2373 | break; |
2374 | |
2375 | /* |
2376 | * Since packet addresses are swapped in packet |
2377 | * handling within below switch() statements, |
2378 | * thus setting sample flags must be called |
2379 | * prior to switch() statement to use address |
2380 | * information before packets swapping. |
2381 | */ |
2382 | ret = cs_etm__set_sample_flags(etmq, tidq); |
2383 | if (ret < 0) |
2384 | break; |
2385 | |
2386 | switch (tidq->packet->sample_type) { |
2387 | case CS_ETM_RANGE: |
2388 | /* |
2389 | * If the packet contains an instruction |
2390 | * range, generate instruction sequence |
2391 | * events. |
2392 | */ |
2393 | cs_etm__sample(etmq, tidq); |
2394 | break; |
2395 | case CS_ETM_EXCEPTION: |
2396 | case CS_ETM_EXCEPTION_RET: |
2397 | /* |
2398 | * If the exception packet is coming, |
2399 | * make sure the previous instruction |
2400 | * range packet to be handled properly. |
2401 | */ |
2402 | cs_etm__exception(tidq); |
2403 | break; |
2404 | case CS_ETM_DISCONTINUITY: |
2405 | /* |
2406 | * Discontinuity in trace, flush |
2407 | * previous branch stack |
2408 | */ |
2409 | cs_etm__flush(etmq, tidq); |
2410 | break; |
2411 | case CS_ETM_EMPTY: |
2412 | /* |
2413 | * Should not receive empty packet, |
2414 | * report error. |
2415 | */ |
2416 | pr_err("CS ETM Trace: empty packet\n" ); |
2417 | return -EINVAL; |
2418 | default: |
2419 | break; |
2420 | } |
2421 | } |
2422 | |
2423 | return ret; |
2424 | } |
2425 | |
2426 | static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) |
2427 | { |
2428 | int idx; |
2429 | struct int_node *inode; |
2430 | struct cs_etm_traceid_queue *tidq; |
2431 | struct intlist *traceid_queues_list = etmq->traceid_queues_list; |
2432 | |
2433 | intlist__for_each_entry(inode, traceid_queues_list) { |
2434 | idx = (int)(intptr_t)inode->priv; |
2435 | tidq = etmq->traceid_queues[idx]; |
2436 | |
2437 | /* Ignore return value */ |
2438 | cs_etm__process_traceid_queue(etmq, tidq); |
2439 | |
2440 | /* |
2441 | * Generate an instruction sample with the remaining |
2442 | * branchstack entries. |
2443 | */ |
2444 | cs_etm__flush(etmq, tidq); |
2445 | } |
2446 | } |
2447 | |
2448 | static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq) |
2449 | { |
2450 | int err = 0; |
2451 | struct cs_etm_traceid_queue *tidq; |
2452 | |
2453 | tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); |
2454 | if (!tidq) |
2455 | return -EINVAL; |
2456 | |
2457 | /* Go through each buffer in the queue and decode them one by one */ |
2458 | while (1) { |
2459 | err = cs_etm__get_data_block(etmq); |
2460 | if (err <= 0) |
2461 | return err; |
2462 | |
2463 | /* Run trace decoder until buffer consumed or end of trace */ |
2464 | do { |
2465 | err = cs_etm__decode_data_block(etmq); |
2466 | if (err) |
2467 | return err; |
2468 | |
2469 | /* |
2470 | * Process each packet in this chunk, nothing to do if |
2471 | * an error occurs other than hoping the next one will |
2472 | * be better. |
2473 | */ |
2474 | err = cs_etm__process_traceid_queue(etmq, tidq); |
2475 | |
2476 | } while (etmq->buf_len); |
2477 | |
2478 | if (err == 0) |
2479 | /* Flush any remaining branch stack entries */ |
2480 | err = cs_etm__end_block(etmq, tidq); |
2481 | } |
2482 | |
2483 | return err; |
2484 | } |
2485 | |
2486 | static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq) |
2487 | { |
2488 | int idx, err = 0; |
2489 | struct cs_etm_traceid_queue *tidq; |
2490 | struct int_node *inode; |
2491 | |
2492 | /* Go through each buffer in the queue and decode them one by one */ |
2493 | while (1) { |
2494 | err = cs_etm__get_data_block(etmq); |
2495 | if (err <= 0) |
2496 | return err; |
2497 | |
2498 | /* Run trace decoder until buffer consumed or end of trace */ |
2499 | do { |
2500 | err = cs_etm__decode_data_block(etmq); |
2501 | if (err) |
2502 | return err; |
2503 | |
2504 | /* |
2505 | * cs_etm__run_per_thread_timeless_decoder() runs on a |
2506 | * single traceID queue because each TID has a separate |
2507 | * buffer. But here in per-cpu mode we need to iterate |
2508 | * over each channel instead. |
2509 | */ |
2510 | intlist__for_each_entry(inode, |
2511 | etmq->traceid_queues_list) { |
2512 | idx = (int)(intptr_t)inode->priv; |
2513 | tidq = etmq->traceid_queues[idx]; |
2514 | cs_etm__process_traceid_queue(etmq, tidq); |
2515 | } |
2516 | } while (etmq->buf_len); |
2517 | |
2518 | intlist__for_each_entry(inode, etmq->traceid_queues_list) { |
2519 | idx = (int)(intptr_t)inode->priv; |
2520 | tidq = etmq->traceid_queues[idx]; |
2521 | /* Flush any remaining branch stack entries */ |
2522 | err = cs_etm__end_block(etmq, tidq); |
2523 | if (err) |
2524 | return err; |
2525 | } |
2526 | } |
2527 | |
2528 | return err; |
2529 | } |
2530 | |
2531 | static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, |
2532 | pid_t tid) |
2533 | { |
2534 | unsigned int i; |
2535 | struct auxtrace_queues *queues = &etm->queues; |
2536 | |
2537 | for (i = 0; i < queues->nr_queues; i++) { |
2538 | struct auxtrace_queue *queue = &etm->queues.queue_array[i]; |
2539 | struct cs_etm_queue *etmq = queue->priv; |
2540 | struct cs_etm_traceid_queue *tidq; |
2541 | |
2542 | if (!etmq) |
2543 | continue; |
2544 | |
2545 | if (etm->per_thread_decoding) { |
2546 | tidq = cs_etm__etmq_get_traceid_queue( |
2547 | etmq, CS_ETM_PER_THREAD_TRACEID); |
2548 | |
2549 | if (!tidq) |
2550 | continue; |
2551 | |
2552 | if (tid == -1 || thread__tid(thread: tidq->thread) == tid) |
2553 | cs_etm__run_per_thread_timeless_decoder(etmq); |
2554 | } else |
2555 | cs_etm__run_per_cpu_timeless_decoder(etmq); |
2556 | } |
2557 | |
2558 | return 0; |
2559 | } |
2560 | |
2561 | static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) |
2562 | { |
2563 | int ret = 0; |
2564 | unsigned int cs_queue_nr, queue_nr, i; |
2565 | u8 trace_chan_id; |
2566 | u64 cs_timestamp; |
2567 | struct auxtrace_queue *queue; |
2568 | struct cs_etm_queue *etmq; |
2569 | struct cs_etm_traceid_queue *tidq; |
2570 | |
2571 | /* |
2572 | * Pre-populate the heap with one entry from each queue so that we can |
2573 | * start processing in time order across all queues. |
2574 | */ |
2575 | for (i = 0; i < etm->queues.nr_queues; i++) { |
2576 | etmq = etm->queues.queue_array[i].priv; |
2577 | if (!etmq) |
2578 | continue; |
2579 | |
2580 | ret = cs_etm__queue_first_cs_timestamp(etm, etmq, queue_nr: i); |
2581 | if (ret) |
2582 | return ret; |
2583 | } |
2584 | |
2585 | while (1) { |
2586 | if (!etm->heap.heap_cnt) |
2587 | goto out; |
2588 | |
2589 | /* Take the entry at the top of the min heap */ |
2590 | cs_queue_nr = etm->heap.heap_array[0].queue_nr; |
2591 | queue_nr = TO_QUEUE_NR(cs_queue_nr); |
2592 | trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); |
2593 | queue = &etm->queues.queue_array[queue_nr]; |
2594 | etmq = queue->priv; |
2595 | |
2596 | /* |
2597 | * Remove the top entry from the heap since we are about |
2598 | * to process it. |
2599 | */ |
2600 | auxtrace_heap__pop(&etm->heap); |
2601 | |
2602 | tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); |
2603 | if (!tidq) { |
2604 | /* |
2605 | * No traceID queue has been allocated for this traceID, |
2606 | * which means something somewhere went very wrong. No |
2607 | * other choice than simply exit. |
2608 | */ |
2609 | ret = -EINVAL; |
2610 | goto out; |
2611 | } |
2612 | |
2613 | /* |
2614 | * Packets associated with this timestamp are already in |
2615 | * the etmq's traceID queue, so process them. |
2616 | */ |
2617 | ret = cs_etm__process_traceid_queue(etmq, tidq); |
2618 | if (ret < 0) |
2619 | goto out; |
2620 | |
2621 | /* |
2622 | * Packets for this timestamp have been processed, time to |
2623 | * move on to the next timestamp, fetching a new auxtrace_buffer |
2624 | * if need be. |
2625 | */ |
2626 | refetch: |
2627 | ret = cs_etm__get_data_block(etmq); |
2628 | if (ret < 0) |
2629 | goto out; |
2630 | |
2631 | /* |
2632 | * No more auxtrace_buffers to process in this etmq, simply |
2633 | * move on to another entry in the auxtrace_heap. |
2634 | */ |
2635 | if (!ret) |
2636 | continue; |
2637 | |
2638 | ret = cs_etm__decode_data_block(etmq); |
2639 | if (ret) |
2640 | goto out; |
2641 | |
2642 | cs_timestamp = cs_etm__etmq_get_timestamp(etmq, trace_chan_id: &trace_chan_id); |
2643 | |
2644 | if (!cs_timestamp) { |
2645 | /* |
2646 | * Function cs_etm__decode_data_block() returns when |
2647 | * there is no more traces to decode in the current |
2648 | * auxtrace_buffer OR when a timestamp has been |
2649 | * encountered on any of the traceID queues. Since we |
2650 | * did not get a timestamp, there is no more traces to |
2651 | * process in this auxtrace_buffer. As such empty and |
2652 | * flush all traceID queues. |
2653 | */ |
2654 | cs_etm__clear_all_traceid_queues(etmq); |
2655 | |
2656 | /* Fetch another auxtrace_buffer for this etmq */ |
2657 | goto refetch; |
2658 | } |
2659 | |
2660 | /* |
2661 | * Add to the min heap the timestamp for packets that have |
2662 | * just been decoded. They will be processed and synthesized |
2663 | * during the next call to cs_etm__process_traceid_queue() for |
2664 | * this queue/traceID. |
2665 | */ |
2666 | cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); |
2667 | ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); |
2668 | } |
2669 | |
2670 | out: |
2671 | return ret; |
2672 | } |
2673 | |
2674 | static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, |
2675 | union perf_event *event) |
2676 | { |
2677 | struct thread *th; |
2678 | |
2679 | if (etm->timeless_decoding) |
2680 | return 0; |
2681 | |
2682 | /* |
2683 | * Add the tid/pid to the log so that we can get a match when we get a |
2684 | * contextID from the decoder. Only track for the host: only kernel |
2685 | * trace is supported for guests which wouldn't need pids so this should |
2686 | * be fine. |
2687 | */ |
2688 | th = machine__findnew_thread(machine: &etm->session->machines.host, |
2689 | pid: event->itrace_start.pid, |
2690 | tid: event->itrace_start.tid); |
2691 | if (!th) |
2692 | return -ENOMEM; |
2693 | |
2694 | thread__put(thread: th); |
2695 | |
2696 | return 0; |
2697 | } |
2698 | |
2699 | static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, |
2700 | union perf_event *event) |
2701 | { |
2702 | struct thread *th; |
2703 | bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; |
2704 | |
2705 | /* |
2706 | * Context switch in per-thread mode are irrelevant since perf |
2707 | * will start/stop tracing as the process is scheduled. |
2708 | */ |
2709 | if (etm->timeless_decoding) |
2710 | return 0; |
2711 | |
2712 | /* |
2713 | * SWITCH_IN events carry the next process to be switched out while |
2714 | * SWITCH_OUT events carry the process to be switched in. As such |
2715 | * we don't care about IN events. |
2716 | */ |
2717 | if (!out) |
2718 | return 0; |
2719 | |
2720 | /* |
2721 | * Add the tid/pid to the log so that we can get a match when we get a |
2722 | * contextID from the decoder. Only track for the host: only kernel |
2723 | * trace is supported for guests which wouldn't need pids so this should |
2724 | * be fine. |
2725 | */ |
2726 | th = machine__findnew_thread(machine: &etm->session->machines.host, |
2727 | pid: event->context_switch.next_prev_pid, |
2728 | tid: event->context_switch.next_prev_tid); |
2729 | if (!th) |
2730 | return -ENOMEM; |
2731 | |
2732 | thread__put(thread: th); |
2733 | |
2734 | return 0; |
2735 | } |
2736 | |
2737 | static int cs_etm__process_event(struct perf_session *session, |
2738 | union perf_event *event, |
2739 | struct perf_sample *sample, |
2740 | struct perf_tool *tool) |
2741 | { |
2742 | struct cs_etm_auxtrace *etm = container_of(session->auxtrace, |
2743 | struct cs_etm_auxtrace, |
2744 | auxtrace); |
2745 | |
2746 | if (dump_trace) |
2747 | return 0; |
2748 | |
2749 | if (!tool->ordered_events) { |
2750 | pr_err("CoreSight ETM Trace requires ordered events\n" ); |
2751 | return -EINVAL; |
2752 | } |
2753 | |
2754 | switch (event->header.type) { |
2755 | case PERF_RECORD_EXIT: |
2756 | /* |
2757 | * Don't need to wait for cs_etm__flush_events() in per-thread mode to |
2758 | * start the decode because we know there will be no more trace from |
2759 | * this thread. All this does is emit samples earlier than waiting for |
2760 | * the flush in other modes, but with timestamps it makes sense to wait |
2761 | * for flush so that events from different threads are interleaved |
2762 | * properly. |
2763 | */ |
2764 | if (etm->per_thread_decoding && etm->timeless_decoding) |
2765 | return cs_etm__process_timeless_queues(etm, |
2766 | tid: event->fork.tid); |
2767 | break; |
2768 | |
2769 | case PERF_RECORD_ITRACE_START: |
2770 | return cs_etm__process_itrace_start(etm, event); |
2771 | |
2772 | case PERF_RECORD_SWITCH_CPU_WIDE: |
2773 | return cs_etm__process_switch_cpu_wide(etm, event); |
2774 | |
2775 | case PERF_RECORD_AUX: |
2776 | /* |
2777 | * Record the latest kernel timestamp available in the header |
2778 | * for samples so that synthesised samples occur from this point |
2779 | * onwards. |
2780 | */ |
2781 | if (sample->time && (sample->time != (u64)-1)) |
2782 | etm->latest_kernel_timestamp = sample->time; |
2783 | break; |
2784 | |
2785 | default: |
2786 | break; |
2787 | } |
2788 | |
2789 | return 0; |
2790 | } |
2791 | |
2792 | static void dump_queued_data(struct cs_etm_auxtrace *etm, |
2793 | struct perf_record_auxtrace *event) |
2794 | { |
2795 | struct auxtrace_buffer *buf; |
2796 | unsigned int i; |
2797 | /* |
2798 | * Find all buffers with same reference in the queues and dump them. |
2799 | * This is because the queues can contain multiple entries of the same |
2800 | * buffer that were split on aux records. |
2801 | */ |
2802 | for (i = 0; i < etm->queues.nr_queues; ++i) |
2803 | list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) |
2804 | if (buf->reference == event->reference) |
2805 | cs_etm__dump_event(etmq: etm->queues.queue_array[i].priv, buffer: buf); |
2806 | } |
2807 | |
2808 | static int cs_etm__process_auxtrace_event(struct perf_session *session, |
2809 | union perf_event *event, |
2810 | struct perf_tool *tool __maybe_unused) |
2811 | { |
2812 | struct cs_etm_auxtrace *etm = container_of(session->auxtrace, |
2813 | struct cs_etm_auxtrace, |
2814 | auxtrace); |
2815 | if (!etm->data_queued) { |
2816 | struct auxtrace_buffer *buffer; |
2817 | off_t data_offset; |
2818 | int fd = perf_data__fd(data: session->data); |
2819 | bool is_pipe = perf_data__is_pipe(data: session->data); |
2820 | int err; |
2821 | int idx = event->auxtrace.idx; |
2822 | |
2823 | if (is_pipe) |
2824 | data_offset = 0; |
2825 | else { |
2826 | data_offset = lseek(fd, 0, SEEK_CUR); |
2827 | if (data_offset == -1) |
2828 | return -errno; |
2829 | } |
2830 | |
2831 | err = auxtrace_queues__add_event(&etm->queues, session, |
2832 | event, data_offset, &buffer); |
2833 | if (err) |
2834 | return err; |
2835 | |
2836 | /* |
2837 | * Knowing if the trace is formatted or not requires a lookup of |
2838 | * the aux record so only works in non-piped mode where data is |
2839 | * queued in cs_etm__queue_aux_records(). Always assume |
2840 | * formatted in piped mode (true). |
2841 | */ |
2842 | err = cs_etm__setup_queue(etm, queue: &etm->queues.queue_array[idx], |
2843 | queue_nr: idx, formatted: true, sample_cpu: -1); |
2844 | if (err) |
2845 | return err; |
2846 | |
2847 | if (dump_trace) |
2848 | if (auxtrace_buffer__get_data(buffer, fd)) { |
2849 | cs_etm__dump_event(etmq: etm->queues.queue_array[idx].priv, buffer); |
2850 | auxtrace_buffer__put_data(buffer); |
2851 | } |
2852 | } else if (dump_trace) |
2853 | dump_queued_data(etm, event: &event->auxtrace); |
2854 | |
2855 | return 0; |
2856 | } |
2857 | |
2858 | static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) |
2859 | { |
2860 | struct evsel *evsel; |
2861 | struct evlist *evlist = etm->session->evlist; |
2862 | |
2863 | /* Override timeless mode with user input from --itrace=Z */ |
2864 | if (etm->synth_opts.timeless_decoding) { |
2865 | etm->timeless_decoding = true; |
2866 | return 0; |
2867 | } |
2868 | |
2869 | /* |
2870 | * Find the cs_etm evsel and look at what its timestamp setting was |
2871 | */ |
2872 | evlist__for_each_entry(evlist, evsel) |
2873 | if (cs_etm__evsel_is_auxtrace(session: etm->session, evsel)) { |
2874 | etm->timeless_decoding = |
2875 | !(evsel->core.attr.config & BIT(ETM_OPT_TS)); |
2876 | return 0; |
2877 | } |
2878 | |
2879 | pr_err("CS ETM: Couldn't find ETM evsel\n" ); |
2880 | return -EINVAL; |
2881 | } |
2882 | |
2883 | /* |
2884 | * Read a single cpu parameter block from the auxtrace_info priv block. |
2885 | * |
2886 | * For version 1 there is a per cpu nr_params entry. If we are handling |
2887 | * version 1 file, then there may be less, the same, or more params |
2888 | * indicated by this value than the compile time number we understand. |
2889 | * |
2890 | * For a version 0 info block, there are a fixed number, and we need to |
2891 | * fill out the nr_param value in the metadata we create. |
2892 | */ |
2893 | static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, |
2894 | int out_blk_size, int nr_params_v0) |
2895 | { |
2896 | u64 *metadata = NULL; |
2897 | int hdr_version; |
2898 | int nr_in_params, nr_out_params, nr_cmn_params; |
2899 | int i, k; |
2900 | |
2901 | metadata = zalloc(sizeof(*metadata) * out_blk_size); |
2902 | if (!metadata) |
2903 | return NULL; |
2904 | |
2905 | /* read block current index & version */ |
2906 | i = *buff_in_offset; |
2907 | hdr_version = buff_in[CS_HEADER_VERSION]; |
2908 | |
2909 | if (!hdr_version) { |
2910 | /* read version 0 info block into a version 1 metadata block */ |
2911 | nr_in_params = nr_params_v0; |
2912 | metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; |
2913 | metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; |
2914 | metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; |
2915 | /* remaining block params at offset +1 from source */ |
2916 | for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) |
2917 | metadata[k + 1] = buff_in[i + k]; |
2918 | /* version 0 has 2 common params */ |
2919 | nr_cmn_params = 2; |
2920 | } else { |
2921 | /* read version 1 info block - input and output nr_params may differ */ |
2922 | /* version 1 has 3 common params */ |
2923 | nr_cmn_params = 3; |
2924 | nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; |
2925 | |
2926 | /* if input has more params than output - skip excess */ |
2927 | nr_out_params = nr_in_params + nr_cmn_params; |
2928 | if (nr_out_params > out_blk_size) |
2929 | nr_out_params = out_blk_size; |
2930 | |
2931 | for (k = CS_ETM_MAGIC; k < nr_out_params; k++) |
2932 | metadata[k] = buff_in[i + k]; |
2933 | |
2934 | /* record the actual nr params we copied */ |
2935 | metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; |
2936 | } |
2937 | |
2938 | /* adjust in offset by number of in params used */ |
2939 | i += nr_in_params + nr_cmn_params; |
2940 | *buff_in_offset = i; |
2941 | return metadata; |
2942 | } |
2943 | |
2944 | /** |
2945 | * Puts a fragment of an auxtrace buffer into the auxtrace queues based |
2946 | * on the bounds of aux_event, if it matches with the buffer that's at |
2947 | * file_offset. |
2948 | * |
2949 | * Normally, whole auxtrace buffers would be added to the queue. But we |
2950 | * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder |
2951 | * is reset across each buffer, so splitting the buffers up in advance has |
2952 | * the same effect. |
2953 | */ |
2954 | static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, |
2955 | struct perf_record_aux *aux_event, struct perf_sample *sample) |
2956 | { |
2957 | int err; |
2958 | char buf[PERF_SAMPLE_MAX_SIZE]; |
2959 | union perf_event *auxtrace_event_union; |
2960 | struct perf_record_auxtrace *auxtrace_event; |
2961 | union perf_event auxtrace_fragment; |
2962 | __u64 aux_offset, aux_size; |
2963 | __u32 idx; |
2964 | bool formatted; |
2965 | |
2966 | struct cs_etm_auxtrace *etm = container_of(session->auxtrace, |
2967 | struct cs_etm_auxtrace, |
2968 | auxtrace); |
2969 | |
2970 | /* |
2971 | * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got |
2972 | * from looping through the auxtrace index. |
2973 | */ |
2974 | err = perf_session__peek_event(session, file_offset, buf, |
2975 | PERF_SAMPLE_MAX_SIZE, event_ptr: &auxtrace_event_union, NULL); |
2976 | if (err) |
2977 | return err; |
2978 | auxtrace_event = &auxtrace_event_union->auxtrace; |
2979 | if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) |
2980 | return -EINVAL; |
2981 | |
2982 | if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || |
2983 | auxtrace_event->header.size != sz) { |
2984 | return -EINVAL; |
2985 | } |
2986 | |
2987 | /* |
2988 | * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See |
2989 | * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a |
2990 | * CPU as we set this always for the AUX_OUTPUT_HW_ID event. |
2991 | * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. |
2992 | * Return 'not found' if mismatch. |
2993 | */ |
2994 | if (auxtrace_event->cpu == (__u32) -1) { |
2995 | etm->per_thread_decoding = true; |
2996 | if (auxtrace_event->tid != sample->tid) |
2997 | return 1; |
2998 | } else if (auxtrace_event->cpu != sample->cpu) { |
2999 | if (etm->per_thread_decoding) { |
3000 | /* |
3001 | * Found a per-cpu buffer after a per-thread one was |
3002 | * already found |
3003 | */ |
3004 | pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n" ); |
3005 | return -EINVAL; |
3006 | } |
3007 | return 1; |
3008 | } |
3009 | |
3010 | if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { |
3011 | /* |
3012 | * Clamp size in snapshot mode. The buffer size is clamped in |
3013 | * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect |
3014 | * the buffer size. |
3015 | */ |
3016 | aux_size = min(aux_event->aux_size, auxtrace_event->size); |
3017 | |
3018 | /* |
3019 | * In this mode, the head also points to the end of the buffer so aux_offset |
3020 | * needs to have the size subtracted so it points to the beginning as in normal mode |
3021 | */ |
3022 | aux_offset = aux_event->aux_offset - aux_size; |
3023 | } else { |
3024 | aux_size = aux_event->aux_size; |
3025 | aux_offset = aux_event->aux_offset; |
3026 | } |
3027 | |
3028 | if (aux_offset >= auxtrace_event->offset && |
3029 | aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { |
3030 | /* |
3031 | * If this AUX event was inside this buffer somewhere, create a new auxtrace event |
3032 | * based on the sizes of the aux event, and queue that fragment. |
3033 | */ |
3034 | auxtrace_fragment.auxtrace = *auxtrace_event; |
3035 | auxtrace_fragment.auxtrace.size = aux_size; |
3036 | auxtrace_fragment.auxtrace.offset = aux_offset; |
3037 | file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; |
3038 | |
3039 | pr_debug3("CS ETM: Queue buffer size: %#" PRI_lx64" offset: %#" PRI_lx64 |
3040 | " tid: %d cpu: %d\n" , aux_size, aux_offset, sample->tid, sample->cpu); |
3041 | err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, |
3042 | file_offset, NULL); |
3043 | if (err) |
3044 | return err; |
3045 | |
3046 | idx = auxtrace_event->idx; |
3047 | formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); |
3048 | return cs_etm__setup_queue(etm, queue: &etm->queues.queue_array[idx], |
3049 | queue_nr: idx, formatted, sample_cpu: sample->cpu); |
3050 | } |
3051 | |
3052 | /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ |
3053 | return 1; |
3054 | } |
3055 | |
3056 | static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, |
3057 | u64 offset __maybe_unused, void *data __maybe_unused) |
3058 | { |
3059 | /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ |
3060 | if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { |
3061 | (*(int *)data)++; /* increment found count */ |
3062 | return cs_etm__process_aux_output_hw_id(session, event); |
3063 | } |
3064 | return 0; |
3065 | } |
3066 | |
3067 | static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, |
3068 | u64 offset __maybe_unused, void *data __maybe_unused) |
3069 | { |
3070 | struct perf_sample sample; |
3071 | int ret; |
3072 | struct auxtrace_index_entry *ent; |
3073 | struct auxtrace_index *auxtrace_index; |
3074 | struct evsel *evsel; |
3075 | size_t i; |
3076 | |
3077 | /* Don't care about any other events, we're only queuing buffers for AUX events */ |
3078 | if (event->header.type != PERF_RECORD_AUX) |
3079 | return 0; |
3080 | |
3081 | if (event->header.size < sizeof(struct perf_record_aux)) |
3082 | return -EINVAL; |
3083 | |
3084 | /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ |
3085 | if (!event->aux.aux_size) |
3086 | return 0; |
3087 | |
3088 | /* |
3089 | * Parse the sample, we need the sample_id_all data that comes after the event so that the |
3090 | * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. |
3091 | */ |
3092 | evsel = evlist__event2evsel(evlist: session->evlist, event); |
3093 | if (!evsel) |
3094 | return -EINVAL; |
3095 | ret = evsel__parse_sample(evsel, event, sample: &sample); |
3096 | if (ret) |
3097 | return ret; |
3098 | |
3099 | /* |
3100 | * Loop through the auxtrace index to find the buffer that matches up with this aux event. |
3101 | */ |
3102 | list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { |
3103 | for (i = 0; i < auxtrace_index->nr; i++) { |
3104 | ent = &auxtrace_index->entries[i]; |
3105 | ret = cs_etm__queue_aux_fragment(session, file_offset: ent->file_offset, |
3106 | sz: ent->sz, aux_event: &event->aux, sample: &sample); |
3107 | /* |
3108 | * Stop search on error or successful values. Continue search on |
3109 | * 1 ('not found') |
3110 | */ |
3111 | if (ret != 1) |
3112 | return ret; |
3113 | } |
3114 | } |
3115 | |
3116 | /* |
3117 | * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but |
3118 | * don't exit with an error because it will still be possible to decode other aux records. |
3119 | */ |
3120 | pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#" PRI_lx64 |
3121 | " tid: %d cpu: %d\n" , event->aux.aux_offset, sample.tid, sample.cpu); |
3122 | return 0; |
3123 | } |
3124 | |
3125 | static int cs_etm__queue_aux_records(struct perf_session *session) |
3126 | { |
3127 | struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, |
3128 | struct auxtrace_index, list); |
3129 | if (index && index->nr > 0) |
3130 | return perf_session__peek_events(session, offset: session->header.data_offset, |
3131 | size: session->header.data_size, |
3132 | cb: cs_etm__queue_aux_records_cb, NULL); |
3133 | |
3134 | /* |
3135 | * We would get here if there are no entries in the index (either no auxtrace |
3136 | * buffers or no index at all). Fail silently as there is the possibility of |
3137 | * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still |
3138 | * false. |
3139 | * |
3140 | * In that scenario, buffers will not be split by AUX records. |
3141 | */ |
3142 | return 0; |
3143 | } |
3144 | |
3145 | #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ |
3146 | (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) |
3147 | |
3148 | /* |
3149 | * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual |
3150 | * timestamps). |
3151 | */ |
3152 | static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) |
3153 | { |
3154 | int j; |
3155 | |
3156 | for (j = 0; j < num_cpu; j++) { |
3157 | switch (metadata[j][CS_ETM_MAGIC]) { |
3158 | case __perf_cs_etmv4_magic: |
3159 | if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) |
3160 | return false; |
3161 | break; |
3162 | case __perf_cs_ete_magic: |
3163 | if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) |
3164 | return false; |
3165 | break; |
3166 | default: |
3167 | /* Unknown / unsupported magic number. */ |
3168 | return false; |
3169 | } |
3170 | } |
3171 | return true; |
3172 | } |
3173 | |
3174 | /* map trace ids to correct metadata block, from information in metadata */ |
3175 | static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) |
3176 | { |
3177 | u64 cs_etm_magic; |
3178 | u8 trace_chan_id; |
3179 | int i, err; |
3180 | |
3181 | for (i = 0; i < num_cpu; i++) { |
3182 | cs_etm_magic = metadata[i][CS_ETM_MAGIC]; |
3183 | switch (cs_etm_magic) { |
3184 | case __perf_cs_etmv3_magic: |
3185 | metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; |
3186 | trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); |
3187 | break; |
3188 | case __perf_cs_etmv4_magic: |
3189 | case __perf_cs_ete_magic: |
3190 | metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; |
3191 | trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); |
3192 | break; |
3193 | default: |
3194 | /* unknown magic number */ |
3195 | return -EINVAL; |
3196 | } |
3197 | err = cs_etm__map_trace_id(trace_chan_id, cpu_metadata: metadata[i]); |
3198 | if (err) |
3199 | return err; |
3200 | } |
3201 | return 0; |
3202 | } |
3203 | |
3204 | /* |
3205 | * If we found AUX_HW_ID packets, then set any metadata marked as unused to the |
3206 | * unused value to reduce the number of unneeded decoders created. |
3207 | */ |
3208 | static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata) |
3209 | { |
3210 | u64 cs_etm_magic; |
3211 | int i; |
3212 | |
3213 | for (i = 0; i < num_cpu; i++) { |
3214 | cs_etm_magic = metadata[i][CS_ETM_MAGIC]; |
3215 | switch (cs_etm_magic) { |
3216 | case __perf_cs_etmv3_magic: |
3217 | if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) |
3218 | metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; |
3219 | break; |
3220 | case __perf_cs_etmv4_magic: |
3221 | case __perf_cs_ete_magic: |
3222 | if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) |
3223 | metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; |
3224 | break; |
3225 | default: |
3226 | /* unknown magic number */ |
3227 | return -EINVAL; |
3228 | } |
3229 | } |
3230 | return 0; |
3231 | } |
3232 | |
3233 | int cs_etm__process_auxtrace_info_full(union perf_event *event, |
3234 | struct perf_session *session) |
3235 | { |
3236 | struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; |
3237 | struct cs_etm_auxtrace *etm = NULL; |
3238 | struct perf_record_time_conv *tc = &session->time_conv; |
3239 | int = sizeof(struct perf_event_header); |
3240 | int total_size = auxtrace_info->header.size; |
3241 | int priv_size = 0; |
3242 | int num_cpu; |
3243 | int err = 0; |
3244 | int aux_hw_id_found; |
3245 | int i, j; |
3246 | u64 *ptr = NULL; |
3247 | u64 **metadata = NULL; |
3248 | |
3249 | /* |
3250 | * Create an RB tree for traceID-metadata tuple. Since the conversion |
3251 | * has to be made for each packet that gets decoded, optimizing access |
3252 | * in anything other than a sequential array is worth doing. |
3253 | */ |
3254 | traceid_list = intlist__new(NULL); |
3255 | if (!traceid_list) |
3256 | return -ENOMEM; |
3257 | |
3258 | /* First the global part */ |
3259 | ptr = (u64 *) auxtrace_info->priv; |
3260 | num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; |
3261 | metadata = zalloc(sizeof(*metadata) * num_cpu); |
3262 | if (!metadata) { |
3263 | err = -ENOMEM; |
3264 | goto err_free_traceid_list; |
3265 | } |
3266 | |
3267 | /* Start parsing after the common part of the header */ |
3268 | i = CS_HEADER_VERSION_MAX; |
3269 | |
3270 | /* |
3271 | * The metadata is stored in the auxtrace_info section and encodes |
3272 | * the configuration of the ARM embedded trace macrocell which is |
3273 | * required by the trace decoder to properly decode the trace due |
3274 | * to its highly compressed nature. |
3275 | */ |
3276 | for (j = 0; j < num_cpu; j++) { |
3277 | if (ptr[i] == __perf_cs_etmv3_magic) { |
3278 | metadata[j] = |
3279 | cs_etm__create_meta_blk(buff_in: ptr, buff_in_offset: &i, |
3280 | out_blk_size: CS_ETM_PRIV_MAX, |
3281 | CS_ETM_NR_TRC_PARAMS_V0); |
3282 | } else if (ptr[i] == __perf_cs_etmv4_magic) { |
3283 | metadata[j] = |
3284 | cs_etm__create_meta_blk(buff_in: ptr, buff_in_offset: &i, |
3285 | out_blk_size: CS_ETMV4_PRIV_MAX, |
3286 | CS_ETMV4_NR_TRC_PARAMS_V0); |
3287 | } else if (ptr[i] == __perf_cs_ete_magic) { |
3288 | metadata[j] = cs_etm__create_meta_blk(buff_in: ptr, buff_in_offset: &i, out_blk_size: CS_ETE_PRIV_MAX, nr_params_v0: -1); |
3289 | } else { |
3290 | ui__error("CS ETM Trace: Unrecognised magic number %#" PRIx64". File could be from a newer version of perf.\n" , |
3291 | ptr[i]); |
3292 | err = -EINVAL; |
3293 | goto err_free_metadata; |
3294 | } |
3295 | |
3296 | if (!metadata[j]) { |
3297 | err = -ENOMEM; |
3298 | goto err_free_metadata; |
3299 | } |
3300 | } |
3301 | |
3302 | /* |
3303 | * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and |
3304 | * CS_ETMV4_PRIV_MAX mark how many double words are in the |
3305 | * global metadata, and each cpu's metadata respectively. |
3306 | * The following tests if the correct number of double words was |
3307 | * present in the auxtrace info section. |
3308 | */ |
3309 | priv_size = total_size - event_header_size - INFO_HEADER_SIZE; |
3310 | if (i * 8 != priv_size) { |
3311 | err = -EINVAL; |
3312 | goto err_free_metadata; |
3313 | } |
3314 | |
3315 | etm = zalloc(sizeof(*etm)); |
3316 | |
3317 | if (!etm) { |
3318 | err = -ENOMEM; |
3319 | goto err_free_metadata; |
3320 | } |
3321 | |
3322 | /* |
3323 | * As all the ETMs run at the same exception level, the system should |
3324 | * have the same PID format crossing CPUs. So cache the PID format |
3325 | * and reuse it for sequential decoding. |
3326 | */ |
3327 | etm->pid_fmt = cs_etm__init_pid_fmt(metadata: metadata[0]); |
3328 | |
3329 | err = auxtrace_queues__init(&etm->queues); |
3330 | if (err) |
3331 | goto err_free_etm; |
3332 | |
3333 | if (session->itrace_synth_opts->set) { |
3334 | etm->synth_opts = *session->itrace_synth_opts; |
3335 | } else { |
3336 | itrace_synth_opts__set_default(&etm->synth_opts, |
3337 | session->itrace_synth_opts->default_no_sample); |
3338 | etm->synth_opts.callchain = false; |
3339 | } |
3340 | |
3341 | etm->session = session; |
3342 | |
3343 | etm->num_cpu = num_cpu; |
3344 | etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); |
3345 | etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0); |
3346 | etm->metadata = metadata; |
3347 | etm->auxtrace_type = auxtrace_info->type; |
3348 | |
3349 | if (etm->synth_opts.use_timestamp) |
3350 | /* |
3351 | * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, |
3352 | * therefore the decoder cannot know if the timestamp trace is |
3353 | * same with the kernel time. |
3354 | * |
3355 | * If a user has knowledge for the working platform and can |
3356 | * specify itrace option 'T' to tell decoder to forcely use the |
3357 | * traced timestamp as the kernel time. |
3358 | */ |
3359 | etm->has_virtual_ts = true; |
3360 | else |
3361 | /* Use virtual timestamps if all ETMs report ts_source = 1 */ |
3362 | etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); |
3363 | |
3364 | if (!etm->has_virtual_ts) |
3365 | ui__warning(format: "Virtual timestamps are not enabled, or not supported by the traced system.\n" |
3366 | "The time field of the samples will not be set accurately.\n" |
3367 | "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" |
3368 | "you can specify the itrace option 'T' for timestamp decoding\n" |
3369 | "if the Coresight timestamp on the platform is same with the kernel time.\n\n" ); |
3370 | |
3371 | etm->auxtrace.process_event = cs_etm__process_event; |
3372 | etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; |
3373 | etm->auxtrace.flush_events = cs_etm__flush_events; |
3374 | etm->auxtrace.free_events = cs_etm__free_events; |
3375 | etm->auxtrace.free = cs_etm__free; |
3376 | etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; |
3377 | session->auxtrace = &etm->auxtrace; |
3378 | |
3379 | err = cs_etm__setup_timeless_decoding(etm); |
3380 | if (err) |
3381 | return err; |
3382 | |
3383 | etm->tc.time_shift = tc->time_shift; |
3384 | etm->tc.time_mult = tc->time_mult; |
3385 | etm->tc.time_zero = tc->time_zero; |
3386 | if (event_contains(*tc, time_cycles)) { |
3387 | etm->tc.time_cycles = tc->time_cycles; |
3388 | etm->tc.time_mask = tc->time_mask; |
3389 | etm->tc.cap_user_time_zero = tc->cap_user_time_zero; |
3390 | etm->tc.cap_user_time_short = tc->cap_user_time_short; |
3391 | } |
3392 | err = cs_etm__synth_events(etm, session); |
3393 | if (err) |
3394 | goto err_free_queues; |
3395 | |
3396 | /* |
3397 | * Map Trace ID values to CPU metadata. |
3398 | * |
3399 | * Trace metadata will always contain Trace ID values from the legacy algorithm. If the |
3400 | * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata |
3401 | * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set. |
3402 | * |
3403 | * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use |
3404 | * the same IDs as the old algorithm as far as is possible, unless there are clashes |
3405 | * in which case a different value will be used. This means an older perf may still |
3406 | * be able to record and read files generate on a newer system. |
3407 | * |
3408 | * For a perf able to interpret AUX_HW_ID packets we first check for the presence of |
3409 | * those packets. If they are there then the values will be mapped and plugged into |
3410 | * the metadata. We then set any remaining metadata values with the used flag to a |
3411 | * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required. |
3412 | * |
3413 | * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel |
3414 | * then we map Trace ID values to CPU directly from the metadata - clearing any unused |
3415 | * flags if present. |
3416 | */ |
3417 | |
3418 | /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ |
3419 | aux_hw_id_found = 0; |
3420 | err = perf_session__peek_events(session, offset: session->header.data_offset, |
3421 | size: session->header.data_size, |
3422 | cb: cs_etm__process_aux_hw_id_cb, data: &aux_hw_id_found); |
3423 | if (err) |
3424 | goto err_free_queues; |
3425 | |
3426 | /* if HW ID found then clear any unused metadata ID values */ |
3427 | if (aux_hw_id_found) |
3428 | err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata); |
3429 | /* otherwise, this is a file with metadata values only, map from metadata */ |
3430 | else |
3431 | err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); |
3432 | |
3433 | if (err) |
3434 | goto err_free_queues; |
3435 | |
3436 | err = cs_etm__queue_aux_records(session); |
3437 | if (err) |
3438 | goto err_free_queues; |
3439 | |
3440 | etm->data_queued = etm->queues.populated; |
3441 | return 0; |
3442 | |
3443 | err_free_queues: |
3444 | auxtrace_queues__free(&etm->queues); |
3445 | session->auxtrace = NULL; |
3446 | err_free_etm: |
3447 | zfree(&etm); |
3448 | err_free_metadata: |
3449 | /* No need to check @metadata[j], free(NULL) is supported */ |
3450 | for (j = 0; j < num_cpu; j++) |
3451 | zfree(&metadata[j]); |
3452 | zfree(&metadata); |
3453 | err_free_traceid_list: |
3454 | intlist__delete(ilist: traceid_list); |
3455 | return err; |
3456 | } |
3457 | |