1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * builtin-inject.c |
4 | * |
5 | * Builtin inject command: Examine the live mode (stdin) event stream |
6 | * and repipe it to stdout while optionally injecting additional |
7 | * events into it. |
8 | */ |
9 | #include "builtin.h" |
10 | |
11 | #include "util/color.h" |
12 | #include "util/dso.h" |
13 | #include "util/vdso.h" |
14 | #include "util/evlist.h" |
15 | #include "util/evsel.h" |
16 | #include "util/map.h" |
17 | #include "util/session.h" |
18 | #include "util/tool.h" |
19 | #include "util/debug.h" |
20 | #include "util/build-id.h" |
21 | #include "util/data.h" |
22 | #include "util/auxtrace.h" |
23 | #include "util/jit.h" |
24 | #include "util/string2.h" |
25 | #include "util/symbol.h" |
26 | #include "util/synthetic-events.h" |
27 | #include "util/thread.h" |
28 | #include "util/namespaces.h" |
29 | #include "util/util.h" |
30 | #include "util/tsc.h" |
31 | |
32 | #include <internal/lib.h> |
33 | |
34 | #include <linux/err.h> |
35 | #include <subcmd/parse-options.h> |
36 | #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ |
37 | |
38 | #include <linux/list.h> |
39 | #include <linux/string.h> |
40 | #include <linux/zalloc.h> |
41 | #include <linux/hash.h> |
42 | #include <ctype.h> |
43 | #include <errno.h> |
44 | #include <signal.h> |
45 | #include <inttypes.h> |
46 | |
47 | struct guest_event { |
48 | struct perf_sample sample; |
49 | union perf_event *event; |
50 | char *event_buf; |
51 | }; |
52 | |
53 | struct guest_id { |
54 | /* hlist_node must be first, see free_hlist() */ |
55 | struct hlist_node node; |
56 | u64 id; |
57 | u64 host_id; |
58 | u32 vcpu; |
59 | }; |
60 | |
61 | struct guest_tid { |
62 | /* hlist_node must be first, see free_hlist() */ |
63 | struct hlist_node node; |
64 | /* Thread ID of QEMU thread */ |
65 | u32 tid; |
66 | u32 vcpu; |
67 | }; |
68 | |
69 | struct guest_vcpu { |
70 | /* Current host CPU */ |
71 | u32 cpu; |
72 | /* Thread ID of QEMU thread */ |
73 | u32 tid; |
74 | }; |
75 | |
76 | struct guest_session { |
77 | char *perf_data_file; |
78 | u32 machine_pid; |
79 | u64 time_offset; |
80 | double time_scale; |
81 | struct perf_tool tool; |
82 | struct perf_data data; |
83 | struct perf_session *session; |
84 | char *tmp_file_name; |
85 | int tmp_fd; |
86 | struct perf_tsc_conversion host_tc; |
87 | struct perf_tsc_conversion guest_tc; |
88 | bool copy_kcore_dir; |
89 | bool have_tc; |
90 | bool fetched; |
91 | bool ready; |
92 | u16 dflt_id_hdr_size; |
93 | u64 dflt_id; |
94 | u64 highest_id; |
95 | /* Array of guest_vcpu */ |
96 | struct guest_vcpu *vcpu; |
97 | size_t vcpu_cnt; |
98 | /* Hash table for guest_id */ |
99 | struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; |
100 | /* Hash table for guest_tid */ |
101 | struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; |
102 | /* Place to stash next guest event */ |
103 | struct guest_event ev; |
104 | }; |
105 | |
106 | struct perf_inject { |
107 | struct perf_tool tool; |
108 | struct perf_session *session; |
109 | bool build_ids; |
110 | bool build_id_all; |
111 | bool sched_stat; |
112 | bool have_auxtrace; |
113 | bool strip; |
114 | bool jit_mode; |
115 | bool in_place_update; |
116 | bool in_place_update_dry_run; |
117 | bool is_pipe; |
118 | bool copy_kcore_dir; |
119 | const char *input_name; |
120 | struct perf_data output; |
121 | u64 bytes_written; |
122 | u64 aux_id; |
123 | struct list_head samples; |
124 | struct itrace_synth_opts itrace_synth_opts; |
125 | char *event_copy; |
126 | struct perf_file_section secs[HEADER_FEAT_BITS]; |
127 | struct guest_session guest_session; |
128 | struct strlist *known_build_ids; |
129 | }; |
130 | |
131 | struct event_entry { |
132 | struct list_head node; |
133 | u32 tid; |
134 | union perf_event event[]; |
135 | }; |
136 | |
137 | static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, |
138 | struct machine *machine, u8 cpumode, u32 flags); |
139 | |
140 | static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) |
141 | { |
142 | ssize_t size; |
143 | |
144 | size = perf_data__write(data: &inject->output, buf, size: sz); |
145 | if (size < 0) |
146 | return -errno; |
147 | |
148 | inject->bytes_written += size; |
149 | return 0; |
150 | } |
151 | |
152 | static int perf_event__repipe_synth(struct perf_tool *tool, |
153 | union perf_event *event) |
154 | { |
155 | struct perf_inject *inject = container_of(tool, struct perf_inject, |
156 | tool); |
157 | |
158 | return output_bytes(inject, buf: event, sz: event->header.size); |
159 | } |
160 | |
161 | static int perf_event__repipe_oe_synth(struct perf_tool *tool, |
162 | union perf_event *event, |
163 | struct ordered_events *oe __maybe_unused) |
164 | { |
165 | return perf_event__repipe_synth(tool, event); |
166 | } |
167 | |
168 | #ifdef HAVE_JITDUMP |
169 | static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, |
170 | union perf_event *event __maybe_unused, |
171 | struct ordered_events *oe __maybe_unused) |
172 | { |
173 | return 0; |
174 | } |
175 | #endif |
176 | |
177 | static int perf_event__repipe_op2_synth(struct perf_session *session, |
178 | union perf_event *event) |
179 | { |
180 | return perf_event__repipe_synth(tool: session->tool, event); |
181 | } |
182 | |
183 | static int perf_event__repipe_op4_synth(struct perf_session *session, |
184 | union perf_event *event, |
185 | u64 data __maybe_unused, |
186 | const char *str __maybe_unused) |
187 | { |
188 | return perf_event__repipe_synth(tool: session->tool, event); |
189 | } |
190 | |
191 | static int perf_event__repipe_attr(struct perf_tool *tool, |
192 | union perf_event *event, |
193 | struct evlist **pevlist) |
194 | { |
195 | struct perf_inject *inject = container_of(tool, struct perf_inject, |
196 | tool); |
197 | int ret; |
198 | |
199 | ret = perf_event__process_attr(tool, event, pevlist); |
200 | if (ret) |
201 | return ret; |
202 | |
203 | if (!inject->is_pipe) |
204 | return 0; |
205 | |
206 | return perf_event__repipe_synth(tool, event); |
207 | } |
208 | |
209 | static int perf_event__repipe_event_update(struct perf_tool *tool, |
210 | union perf_event *event, |
211 | struct evlist **pevlist __maybe_unused) |
212 | { |
213 | return perf_event__repipe_synth(tool, event); |
214 | } |
215 | |
216 | #ifdef HAVE_AUXTRACE_SUPPORT |
217 | |
218 | static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) |
219 | { |
220 | char buf[4096]; |
221 | ssize_t ssz; |
222 | int ret; |
223 | |
224 | while (size > 0) { |
225 | ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); |
226 | if (ssz < 0) |
227 | return -errno; |
228 | ret = output_bytes(inject, buf, ssz); |
229 | if (ret) |
230 | return ret; |
231 | size -= ssz; |
232 | } |
233 | |
234 | return 0; |
235 | } |
236 | |
237 | static s64 perf_event__repipe_auxtrace(struct perf_session *session, |
238 | union perf_event *event) |
239 | { |
240 | struct perf_tool *tool = session->tool; |
241 | struct perf_inject *inject = container_of(tool, struct perf_inject, |
242 | tool); |
243 | int ret; |
244 | |
245 | inject->have_auxtrace = true; |
246 | |
247 | if (!inject->output.is_pipe) { |
248 | off_t offset; |
249 | |
250 | offset = lseek(inject->output.file.fd, 0, SEEK_CUR); |
251 | if (offset == -1) |
252 | return -errno; |
253 | ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, |
254 | event, offset); |
255 | if (ret < 0) |
256 | return ret; |
257 | } |
258 | |
259 | if (perf_data__is_pipe(session->data) || !session->one_mmap) { |
260 | ret = output_bytes(inject, event, event->header.size); |
261 | if (ret < 0) |
262 | return ret; |
263 | ret = copy_bytes(inject, session->data, |
264 | event->auxtrace.size); |
265 | } else { |
266 | ret = output_bytes(inject, event, |
267 | event->header.size + event->auxtrace.size); |
268 | } |
269 | if (ret < 0) |
270 | return ret; |
271 | |
272 | return event->auxtrace.size; |
273 | } |
274 | |
275 | #else |
276 | |
277 | static s64 |
278 | perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused, |
279 | union perf_event *event __maybe_unused) |
280 | { |
281 | pr_err("AUX area tracing not supported\n" ); |
282 | return -EINVAL; |
283 | } |
284 | |
285 | #endif |
286 | |
287 | static int perf_event__repipe(struct perf_tool *tool, |
288 | union perf_event *event, |
289 | struct perf_sample *sample __maybe_unused, |
290 | struct machine *machine __maybe_unused) |
291 | { |
292 | return perf_event__repipe_synth(tool, event); |
293 | } |
294 | |
295 | static int perf_event__drop(struct perf_tool *tool __maybe_unused, |
296 | union perf_event *event __maybe_unused, |
297 | struct perf_sample *sample __maybe_unused, |
298 | struct machine *machine __maybe_unused) |
299 | { |
300 | return 0; |
301 | } |
302 | |
303 | static int perf_event__drop_aux(struct perf_tool *tool, |
304 | union perf_event *event __maybe_unused, |
305 | struct perf_sample *sample, |
306 | struct machine *machine __maybe_unused) |
307 | { |
308 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
309 | |
310 | if (!inject->aux_id) |
311 | inject->aux_id = sample->id; |
312 | |
313 | return 0; |
314 | } |
315 | |
316 | static union perf_event * |
317 | perf_inject__cut_auxtrace_sample(struct perf_inject *inject, |
318 | union perf_event *event, |
319 | struct perf_sample *sample) |
320 | { |
321 | size_t sz1 = sample->aux_sample.data - (void *)event; |
322 | size_t sz2 = event->header.size - sample->aux_sample.size - sz1; |
323 | union perf_event *ev; |
324 | |
325 | if (inject->event_copy == NULL) { |
326 | inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); |
327 | if (!inject->event_copy) |
328 | return ERR_PTR(error: -ENOMEM); |
329 | } |
330 | ev = (union perf_event *)inject->event_copy; |
331 | if (sz1 > event->header.size || sz2 > event->header.size || |
332 | sz1 + sz2 > event->header.size || |
333 | sz1 < sizeof(struct perf_event_header) + sizeof(u64)) |
334 | return event; |
335 | |
336 | memcpy(ev, event, sz1); |
337 | memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); |
338 | ev->header.size = sz1 + sz2; |
339 | ((u64 *)((void *)ev + sz1))[-1] = 0; |
340 | |
341 | return ev; |
342 | } |
343 | |
344 | typedef int (*inject_handler)(struct perf_tool *tool, |
345 | union perf_event *event, |
346 | struct perf_sample *sample, |
347 | struct evsel *evsel, |
348 | struct machine *machine); |
349 | |
350 | static int perf_event__repipe_sample(struct perf_tool *tool, |
351 | union perf_event *event, |
352 | struct perf_sample *sample, |
353 | struct evsel *evsel, |
354 | struct machine *machine) |
355 | { |
356 | struct perf_inject *inject = container_of(tool, struct perf_inject, |
357 | tool); |
358 | |
359 | if (evsel && evsel->handler) { |
360 | inject_handler f = evsel->handler; |
361 | return f(tool, event, sample, evsel, machine); |
362 | } |
363 | |
364 | build_id__mark_dso_hit(tool, event, sample, evsel, machine); |
365 | |
366 | if (inject->itrace_synth_opts.set && sample->aux_sample.size) { |
367 | event = perf_inject__cut_auxtrace_sample(inject, event, sample); |
368 | if (IS_ERR(ptr: event)) |
369 | return PTR_ERR(ptr: event); |
370 | } |
371 | |
372 | return perf_event__repipe_synth(tool, event); |
373 | } |
374 | |
375 | static int perf_event__repipe_mmap(struct perf_tool *tool, |
376 | union perf_event *event, |
377 | struct perf_sample *sample, |
378 | struct machine *machine) |
379 | { |
380 | int err; |
381 | |
382 | err = perf_event__process_mmap(tool, event, sample, machine); |
383 | perf_event__repipe(tool, event, sample, machine); |
384 | |
385 | return err; |
386 | } |
387 | |
388 | #ifdef HAVE_JITDUMP |
389 | static int perf_event__jit_repipe_mmap(struct perf_tool *tool, |
390 | union perf_event *event, |
391 | struct perf_sample *sample, |
392 | struct machine *machine) |
393 | { |
394 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
395 | u64 n = 0; |
396 | int ret; |
397 | |
398 | /* |
399 | * if jit marker, then inject jit mmaps and generate ELF images |
400 | */ |
401 | ret = jit_process(inject->session, &inject->output, machine, |
402 | event->mmap.filename, event->mmap.pid, event->mmap.tid, &n); |
403 | if (ret < 0) |
404 | return ret; |
405 | if (ret) { |
406 | inject->bytes_written += n; |
407 | return 0; |
408 | } |
409 | return perf_event__repipe_mmap(tool, event, sample, machine); |
410 | } |
411 | #endif |
412 | |
413 | static struct dso *findnew_dso(int pid, int tid, const char *filename, |
414 | struct dso_id *id, struct machine *machine) |
415 | { |
416 | struct thread *thread; |
417 | struct nsinfo *nsi = NULL; |
418 | struct nsinfo *nnsi; |
419 | struct dso *dso; |
420 | bool vdso; |
421 | |
422 | thread = machine__findnew_thread(machine, pid, tid); |
423 | if (thread == NULL) { |
424 | pr_err("cannot find or create a task %d/%d.\n" , tid, pid); |
425 | return NULL; |
426 | } |
427 | |
428 | vdso = is_vdso_map(filename); |
429 | nsi = nsinfo__get(nsi: thread__nsinfo(thread)); |
430 | |
431 | if (vdso) { |
432 | /* The vdso maps are always on the host and not the |
433 | * container. Ensure that we don't use setns to look |
434 | * them up. |
435 | */ |
436 | nnsi = nsinfo__copy(nsi); |
437 | if (nnsi) { |
438 | nsinfo__put(nsi); |
439 | nsinfo__clear_need_setns(nsi: nnsi); |
440 | nsi = nnsi; |
441 | } |
442 | dso = machine__findnew_vdso(machine, thread); |
443 | } else { |
444 | dso = machine__findnew_dso_id(machine, filename, id); |
445 | } |
446 | |
447 | if (dso) { |
448 | mutex_lock(&dso->lock); |
449 | nsinfo__put(nsi: dso->nsinfo); |
450 | dso->nsinfo = nsi; |
451 | mutex_unlock(lock: &dso->lock); |
452 | } else |
453 | nsinfo__put(nsi); |
454 | |
455 | thread__put(thread); |
456 | return dso; |
457 | } |
458 | |
459 | static int perf_event__repipe_buildid_mmap(struct perf_tool *tool, |
460 | union perf_event *event, |
461 | struct perf_sample *sample, |
462 | struct machine *machine) |
463 | { |
464 | struct dso *dso; |
465 | |
466 | dso = findnew_dso(pid: event->mmap.pid, tid: event->mmap.tid, |
467 | filename: event->mmap.filename, NULL, machine); |
468 | |
469 | if (dso && !dso->hit) { |
470 | dso->hit = 1; |
471 | dso__inject_build_id(dso, tool, machine, cpumode: sample->cpumode, flags: 0); |
472 | } |
473 | dso__put(dso); |
474 | |
475 | return perf_event__repipe(tool, event, sample, machine); |
476 | } |
477 | |
478 | static int perf_event__repipe_mmap2(struct perf_tool *tool, |
479 | union perf_event *event, |
480 | struct perf_sample *sample, |
481 | struct machine *machine) |
482 | { |
483 | int err; |
484 | |
485 | err = perf_event__process_mmap2(tool, event, sample, machine); |
486 | perf_event__repipe(tool, event, sample, machine); |
487 | |
488 | if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { |
489 | struct dso *dso; |
490 | |
491 | dso = findnew_dso(pid: event->mmap2.pid, tid: event->mmap2.tid, |
492 | filename: event->mmap2.filename, NULL, machine); |
493 | if (dso) { |
494 | /* mark it not to inject build-id */ |
495 | dso->hit = 1; |
496 | } |
497 | dso__put(dso); |
498 | } |
499 | |
500 | return err; |
501 | } |
502 | |
503 | #ifdef HAVE_JITDUMP |
504 | static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, |
505 | union perf_event *event, |
506 | struct perf_sample *sample, |
507 | struct machine *machine) |
508 | { |
509 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
510 | u64 n = 0; |
511 | int ret; |
512 | |
513 | /* |
514 | * if jit marker, then inject jit mmaps and generate ELF images |
515 | */ |
516 | ret = jit_process(inject->session, &inject->output, machine, |
517 | event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n); |
518 | if (ret < 0) |
519 | return ret; |
520 | if (ret) { |
521 | inject->bytes_written += n; |
522 | return 0; |
523 | } |
524 | return perf_event__repipe_mmap2(tool, event, sample, machine); |
525 | } |
526 | #endif |
527 | |
528 | static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool, |
529 | union perf_event *event, |
530 | struct perf_sample *sample, |
531 | struct machine *machine) |
532 | { |
533 | struct dso_id dso_id = { |
534 | .maj = event->mmap2.maj, |
535 | .min = event->mmap2.min, |
536 | .ino = event->mmap2.ino, |
537 | .ino_generation = event->mmap2.ino_generation, |
538 | }; |
539 | struct dso *dso; |
540 | |
541 | if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { |
542 | /* cannot use dso_id since it'd have invalid info */ |
543 | dso = findnew_dso(pid: event->mmap2.pid, tid: event->mmap2.tid, |
544 | filename: event->mmap2.filename, NULL, machine); |
545 | if (dso) { |
546 | /* mark it not to inject build-id */ |
547 | dso->hit = 1; |
548 | } |
549 | dso__put(dso); |
550 | perf_event__repipe(tool, event, sample, machine); |
551 | return 0; |
552 | } |
553 | |
554 | dso = findnew_dso(pid: event->mmap2.pid, tid: event->mmap2.tid, |
555 | filename: event->mmap2.filename, id: &dso_id, machine); |
556 | |
557 | if (dso && !dso->hit) { |
558 | dso->hit = 1; |
559 | dso__inject_build_id(dso, tool, machine, cpumode: sample->cpumode, |
560 | flags: event->mmap2.flags); |
561 | } |
562 | dso__put(dso); |
563 | |
564 | perf_event__repipe(tool, event, sample, machine); |
565 | |
566 | return 0; |
567 | } |
568 | |
569 | static int perf_event__repipe_fork(struct perf_tool *tool, |
570 | union perf_event *event, |
571 | struct perf_sample *sample, |
572 | struct machine *machine) |
573 | { |
574 | int err; |
575 | |
576 | err = perf_event__process_fork(tool, event, sample, machine); |
577 | perf_event__repipe(tool, event, sample, machine); |
578 | |
579 | return err; |
580 | } |
581 | |
582 | static int perf_event__repipe_comm(struct perf_tool *tool, |
583 | union perf_event *event, |
584 | struct perf_sample *sample, |
585 | struct machine *machine) |
586 | { |
587 | int err; |
588 | |
589 | err = perf_event__process_comm(tool, event, sample, machine); |
590 | perf_event__repipe(tool, event, sample, machine); |
591 | |
592 | return err; |
593 | } |
594 | |
595 | static int perf_event__repipe_namespaces(struct perf_tool *tool, |
596 | union perf_event *event, |
597 | struct perf_sample *sample, |
598 | struct machine *machine) |
599 | { |
600 | int err = perf_event__process_namespaces(tool, event, sample, machine); |
601 | |
602 | perf_event__repipe(tool, event, sample, machine); |
603 | |
604 | return err; |
605 | } |
606 | |
607 | static int perf_event__repipe_exit(struct perf_tool *tool, |
608 | union perf_event *event, |
609 | struct perf_sample *sample, |
610 | struct machine *machine) |
611 | { |
612 | int err; |
613 | |
614 | err = perf_event__process_exit(tool, event, sample, machine); |
615 | perf_event__repipe(tool, event, sample, machine); |
616 | |
617 | return err; |
618 | } |
619 | |
620 | #ifdef HAVE_LIBTRACEEVENT |
621 | static int perf_event__repipe_tracing_data(struct perf_session *session, |
622 | union perf_event *event) |
623 | { |
624 | perf_event__repipe_synth(session->tool, event); |
625 | |
626 | return perf_event__process_tracing_data(session, event); |
627 | } |
628 | #endif |
629 | |
630 | static int dso__read_build_id(struct dso *dso) |
631 | { |
632 | struct nscookie nsc; |
633 | |
634 | if (dso->has_build_id) |
635 | return 0; |
636 | |
637 | mutex_lock(&dso->lock); |
638 | nsinfo__mountns_enter(nsi: dso->nsinfo, nc: &nsc); |
639 | if (filename__read_build_id(filename: dso->long_name, id: &dso->bid) > 0) |
640 | dso->has_build_id = true; |
641 | else if (dso->nsinfo) { |
642 | char *new_name = dso__filename_with_chroot(dso, filename: dso->long_name); |
643 | |
644 | if (new_name && filename__read_build_id(filename: new_name, id: &dso->bid) > 0) |
645 | dso->has_build_id = true; |
646 | free(new_name); |
647 | } |
648 | nsinfo__mountns_exit(nc: &nsc); |
649 | mutex_unlock(lock: &dso->lock); |
650 | |
651 | return dso->has_build_id ? 0 : -1; |
652 | } |
653 | |
654 | static struct strlist *perf_inject__parse_known_build_ids( |
655 | const char *known_build_ids_string) |
656 | { |
657 | struct str_node *pos, *tmp; |
658 | struct strlist *known_build_ids; |
659 | int bid_len; |
660 | |
661 | known_build_ids = strlist__new(slist: known_build_ids_string, NULL); |
662 | if (known_build_ids == NULL) |
663 | return NULL; |
664 | strlist__for_each_entry_safe(pos, tmp, known_build_ids) { |
665 | const char *build_id, *dso_name; |
666 | |
667 | build_id = skip_spaces(pos->s); |
668 | dso_name = strchr(build_id, ' '); |
669 | if (dso_name == NULL) { |
670 | strlist__remove(slist: known_build_ids, sn: pos); |
671 | continue; |
672 | } |
673 | bid_len = dso_name - pos->s; |
674 | dso_name = skip_spaces(dso_name); |
675 | if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { |
676 | strlist__remove(slist: known_build_ids, sn: pos); |
677 | continue; |
678 | } |
679 | for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { |
680 | if (!isxdigit(build_id[2 * ix]) || |
681 | !isxdigit(build_id[2 * ix + 1])) { |
682 | strlist__remove(slist: known_build_ids, sn: pos); |
683 | break; |
684 | } |
685 | } |
686 | } |
687 | return known_build_ids; |
688 | } |
689 | |
690 | static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, |
691 | struct dso *dso) |
692 | { |
693 | struct str_node *pos; |
694 | int bid_len; |
695 | |
696 | strlist__for_each_entry(pos, inject->known_build_ids) { |
697 | const char *build_id, *dso_name; |
698 | |
699 | build_id = skip_spaces(pos->s); |
700 | dso_name = strchr(build_id, ' '); |
701 | bid_len = dso_name - pos->s; |
702 | dso_name = skip_spaces(dso_name); |
703 | if (strcmp(dso->long_name, dso_name)) |
704 | continue; |
705 | for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { |
706 | dso->bid.data[ix] = (hex(c: build_id[2 * ix]) << 4 | |
707 | hex(c: build_id[2 * ix + 1])); |
708 | } |
709 | dso->bid.size = bid_len / 2; |
710 | dso->has_build_id = 1; |
711 | return true; |
712 | } |
713 | return false; |
714 | } |
715 | |
716 | static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, |
717 | struct machine *machine, u8 cpumode, u32 flags) |
718 | { |
719 | struct perf_inject *inject = container_of(tool, struct perf_inject, |
720 | tool); |
721 | int err; |
722 | |
723 | if (is_anon_memory(filename: dso->long_name) || flags & MAP_HUGETLB) |
724 | return 0; |
725 | if (is_no_dso_memory(filename: dso->long_name)) |
726 | return 0; |
727 | |
728 | if (inject->known_build_ids != NULL && |
729 | perf_inject__lookup_known_build_id(inject, dso)) |
730 | return 1; |
731 | |
732 | if (dso__read_build_id(dso) < 0) { |
733 | pr_debug("no build_id found for %s\n" , dso->long_name); |
734 | return -1; |
735 | } |
736 | |
737 | err = perf_event__synthesize_build_id(tool, pos: dso, misc: cpumode, |
738 | process: perf_event__repipe, machine); |
739 | if (err) { |
740 | pr_err("Can't synthesize build_id event for %s\n" , dso->long_name); |
741 | return -1; |
742 | } |
743 | |
744 | return 0; |
745 | } |
746 | |
747 | int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, |
748 | struct perf_sample *sample, |
749 | struct evsel *evsel __maybe_unused, |
750 | struct machine *machine) |
751 | { |
752 | struct addr_location al; |
753 | struct thread *thread; |
754 | |
755 | addr_location__init(al: &al); |
756 | thread = machine__findnew_thread(machine, pid: sample->pid, tid: sample->tid); |
757 | if (thread == NULL) { |
758 | pr_err("problem processing %d event, skipping it.\n" , |
759 | event->header.type); |
760 | goto repipe; |
761 | } |
762 | |
763 | if (thread__find_map(thread, cpumode: sample->cpumode, addr: sample->ip, al: &al)) { |
764 | struct dso *dso = map__dso(map: al.map); |
765 | |
766 | if (!dso->hit) { |
767 | dso->hit = 1; |
768 | dso__inject_build_id(dso, tool, machine, |
769 | cpumode: sample->cpumode, flags: map__flags(map: al.map)); |
770 | } |
771 | } |
772 | |
773 | thread__put(thread); |
774 | repipe: |
775 | perf_event__repipe(tool, event, sample, machine); |
776 | addr_location__exit(al: &al); |
777 | return 0; |
778 | } |
779 | |
780 | static int perf_inject__sched_process_exit(struct perf_tool *tool, |
781 | union perf_event *event __maybe_unused, |
782 | struct perf_sample *sample, |
783 | struct evsel *evsel __maybe_unused, |
784 | struct machine *machine __maybe_unused) |
785 | { |
786 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
787 | struct event_entry *ent; |
788 | |
789 | list_for_each_entry(ent, &inject->samples, node) { |
790 | if (sample->tid == ent->tid) { |
791 | list_del_init(entry: &ent->node); |
792 | free(ent); |
793 | break; |
794 | } |
795 | } |
796 | |
797 | return 0; |
798 | } |
799 | |
800 | static int perf_inject__sched_switch(struct perf_tool *tool, |
801 | union perf_event *event, |
802 | struct perf_sample *sample, |
803 | struct evsel *evsel, |
804 | struct machine *machine) |
805 | { |
806 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
807 | struct event_entry *ent; |
808 | |
809 | perf_inject__sched_process_exit(tool, event, sample, evsel, machine); |
810 | |
811 | ent = malloc(event->header.size + sizeof(struct event_entry)); |
812 | if (ent == NULL) { |
813 | color_fprintf(stderr, PERF_COLOR_RED, |
814 | "Not enough memory to process sched switch event!" ); |
815 | return -1; |
816 | } |
817 | |
818 | ent->tid = sample->tid; |
819 | memcpy(&ent->event, event, event->header.size); |
820 | list_add(new: &ent->node, head: &inject->samples); |
821 | return 0; |
822 | } |
823 | |
824 | #ifdef HAVE_LIBTRACEEVENT |
825 | static int perf_inject__sched_stat(struct perf_tool *tool, |
826 | union perf_event *event __maybe_unused, |
827 | struct perf_sample *sample, |
828 | struct evsel *evsel, |
829 | struct machine *machine) |
830 | { |
831 | struct event_entry *ent; |
832 | union perf_event *event_sw; |
833 | struct perf_sample sample_sw; |
834 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
835 | u32 pid = evsel__intval(evsel, sample, "pid" ); |
836 | |
837 | list_for_each_entry(ent, &inject->samples, node) { |
838 | if (pid == ent->tid) |
839 | goto found; |
840 | } |
841 | |
842 | return 0; |
843 | found: |
844 | event_sw = &ent->event[0]; |
845 | evsel__parse_sample(evsel, event_sw, &sample_sw); |
846 | |
847 | sample_sw.period = sample->period; |
848 | sample_sw.time = sample->time; |
849 | perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, |
850 | evsel->core.attr.read_format, &sample_sw); |
851 | build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); |
852 | return perf_event__repipe(tool, event_sw, &sample_sw, machine); |
853 | } |
854 | #endif |
855 | |
856 | static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) |
857 | { |
858 | if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) |
859 | return NULL; |
860 | return &gs->vcpu[vcpu]; |
861 | } |
862 | |
863 | static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) |
864 | { |
865 | ssize_t ret = writen(gs->tmp_fd, buf, sz); |
866 | |
867 | return ret < 0 ? ret : 0; |
868 | } |
869 | |
870 | static int guest_session__repipe(struct perf_tool *tool, |
871 | union perf_event *event, |
872 | struct perf_sample *sample __maybe_unused, |
873 | struct machine *machine __maybe_unused) |
874 | { |
875 | struct guest_session *gs = container_of(tool, struct guest_session, tool); |
876 | |
877 | return guest_session__output_bytes(gs, buf: event, sz: event->header.size); |
878 | } |
879 | |
880 | static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) |
881 | { |
882 | struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); |
883 | int hash; |
884 | |
885 | if (!guest_tid) |
886 | return -ENOMEM; |
887 | |
888 | guest_tid->tid = tid; |
889 | guest_tid->vcpu = vcpu; |
890 | hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); |
891 | hlist_add_head(n: &guest_tid->node, h: &gs->tids[hash]); |
892 | |
893 | return 0; |
894 | } |
895 | |
896 | static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, |
897 | union perf_event *event, |
898 | u64 offset __maybe_unused, void *data) |
899 | { |
900 | struct guest_session *gs = data; |
901 | unsigned int vcpu; |
902 | struct guest_vcpu *guest_vcpu; |
903 | int ret; |
904 | |
905 | if (event->header.type != PERF_RECORD_COMM || |
906 | event->comm.pid != gs->machine_pid) |
907 | return 0; |
908 | |
909 | /* |
910 | * QEMU option -name debug-threads=on, causes thread names formatted as |
911 | * below, although it is not an ABI. Also libvirt seems to use this by |
912 | * default. Here we rely on it to tell us which thread is which VCPU. |
913 | */ |
914 | ret = sscanf(event->comm.comm, "CPU %u/KVM" , &vcpu); |
915 | if (ret <= 0) |
916 | return ret; |
917 | pr_debug("Found VCPU: tid %u comm %s vcpu %u\n" , |
918 | event->comm.tid, event->comm.comm, vcpu); |
919 | if (vcpu > INT_MAX) { |
920 | pr_err("Invalid VCPU %u\n" , vcpu); |
921 | return -EINVAL; |
922 | } |
923 | guest_vcpu = guest_session__vcpu(gs, vcpu); |
924 | if (!guest_vcpu) |
925 | return -ENOMEM; |
926 | if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { |
927 | pr_err("Fatal error: Two threads found with the same VCPU\n" ); |
928 | return -EINVAL; |
929 | } |
930 | guest_vcpu->tid = event->comm.tid; |
931 | |
932 | return guest_session__map_tid(gs, tid: event->comm.tid, vcpu); |
933 | } |
934 | |
935 | static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) |
936 | { |
937 | return perf_session__peek_events(session, offset: session->header.data_offset, |
938 | size: session->header.data_size, |
939 | cb: host_peek_vm_comms_cb, data: gs); |
940 | } |
941 | |
942 | static bool evlist__is_id_used(struct evlist *evlist, u64 id) |
943 | { |
944 | return evlist__id2sid(evlist, id); |
945 | } |
946 | |
947 | static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) |
948 | { |
949 | do { |
950 | gs->highest_id += 1; |
951 | } while (!gs->highest_id || evlist__is_id_used(evlist: host_evlist, id: gs->highest_id)); |
952 | |
953 | return gs->highest_id; |
954 | } |
955 | |
956 | static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) |
957 | { |
958 | struct guest_id *guest_id = zalloc(sizeof(*guest_id)); |
959 | int hash; |
960 | |
961 | if (!guest_id) |
962 | return -ENOMEM; |
963 | |
964 | guest_id->id = id; |
965 | guest_id->host_id = host_id; |
966 | guest_id->vcpu = vcpu; |
967 | hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); |
968 | hlist_add_head(n: &guest_id->node, h: &gs->heads[hash]); |
969 | |
970 | return 0; |
971 | } |
972 | |
973 | static u64 evlist__find_highest_id(struct evlist *evlist) |
974 | { |
975 | struct evsel *evsel; |
976 | u64 highest_id = 1; |
977 | |
978 | evlist__for_each_entry(evlist, evsel) { |
979 | u32 j; |
980 | |
981 | for (j = 0; j < evsel->core.ids; j++) { |
982 | u64 id = evsel->core.id[j]; |
983 | |
984 | if (id > highest_id) |
985 | highest_id = id; |
986 | } |
987 | } |
988 | |
989 | return highest_id; |
990 | } |
991 | |
992 | static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) |
993 | { |
994 | struct evlist *evlist = gs->session->evlist; |
995 | struct evsel *evsel; |
996 | int ret; |
997 | |
998 | evlist__for_each_entry(evlist, evsel) { |
999 | u32 j; |
1000 | |
1001 | for (j = 0; j < evsel->core.ids; j++) { |
1002 | struct perf_sample_id *sid; |
1003 | u64 host_id; |
1004 | u64 id; |
1005 | |
1006 | id = evsel->core.id[j]; |
1007 | sid = evlist__id2sid(evlist, id); |
1008 | if (!sid || sid->cpu.cpu == -1) |
1009 | continue; |
1010 | host_id = guest_session__allocate_new_id(gs, host_evlist); |
1011 | ret = guest_session__map_id(gs, id, host_id, vcpu: sid->cpu.cpu); |
1012 | if (ret) |
1013 | return ret; |
1014 | } |
1015 | } |
1016 | |
1017 | return 0; |
1018 | } |
1019 | |
1020 | static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) |
1021 | { |
1022 | struct hlist_head *head; |
1023 | struct guest_id *guest_id; |
1024 | int hash; |
1025 | |
1026 | hash = hash_64(id, PERF_EVLIST__HLIST_BITS); |
1027 | head = &gs->heads[hash]; |
1028 | |
1029 | hlist_for_each_entry(guest_id, head, node) |
1030 | if (guest_id->id == id) |
1031 | return guest_id; |
1032 | |
1033 | return NULL; |
1034 | } |
1035 | |
1036 | static int process_attr(struct perf_tool *tool, union perf_event *event, |
1037 | struct perf_sample *sample __maybe_unused, |
1038 | struct machine *machine __maybe_unused) |
1039 | { |
1040 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
1041 | |
1042 | return perf_event__process_attr(tool, event, pevlist: &inject->session->evlist); |
1043 | } |
1044 | |
1045 | static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) |
1046 | { |
1047 | struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); |
1048 | struct perf_event_attr attr = evsel->core.attr; |
1049 | u64 *id_array; |
1050 | u32 *vcpu_array; |
1051 | int ret = -ENOMEM; |
1052 | u32 i; |
1053 | |
1054 | id_array = calloc(evsel->core.ids, sizeof(*id_array)); |
1055 | if (!id_array) |
1056 | return -ENOMEM; |
1057 | |
1058 | vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); |
1059 | if (!vcpu_array) |
1060 | goto out; |
1061 | |
1062 | for (i = 0; i < evsel->core.ids; i++) { |
1063 | u64 id = evsel->core.id[i]; |
1064 | struct guest_id *guest_id = guest_session__lookup_id(gs, id); |
1065 | |
1066 | if (!guest_id) { |
1067 | pr_err("Failed to find guest id %" PRIu64"\n" , id); |
1068 | ret = -EINVAL; |
1069 | goto out; |
1070 | } |
1071 | id_array[i] = guest_id->host_id; |
1072 | vcpu_array[i] = guest_id->vcpu; |
1073 | } |
1074 | |
1075 | attr.sample_type |= PERF_SAMPLE_IDENTIFIER; |
1076 | attr.exclude_host = 1; |
1077 | attr.exclude_guest = 0; |
1078 | |
1079 | ret = perf_event__synthesize_attr(tool: &inject->tool, attr: &attr, ids: evsel->core.ids, |
1080 | id: id_array, process: process_attr); |
1081 | if (ret) |
1082 | pr_err("Failed to add guest attr.\n" ); |
1083 | |
1084 | for (i = 0; i < evsel->core.ids; i++) { |
1085 | struct perf_sample_id *sid; |
1086 | u32 vcpu = vcpu_array[i]; |
1087 | |
1088 | sid = evlist__id2sid(evlist: inject->session->evlist, id: id_array[i]); |
1089 | /* Guest event is per-thread from the host point of view */ |
1090 | sid->cpu.cpu = -1; |
1091 | sid->tid = gs->vcpu[vcpu].tid; |
1092 | sid->machine_pid = gs->machine_pid; |
1093 | sid->vcpu.cpu = vcpu; |
1094 | } |
1095 | out: |
1096 | free(vcpu_array); |
1097 | free(id_array); |
1098 | return ret; |
1099 | } |
1100 | |
1101 | static int guest_session__add_attrs(struct guest_session *gs) |
1102 | { |
1103 | struct evlist *evlist = gs->session->evlist; |
1104 | struct evsel *evsel; |
1105 | int ret; |
1106 | |
1107 | evlist__for_each_entry(evlist, evsel) { |
1108 | ret = guest_session__add_attr(gs, evsel); |
1109 | if (ret) |
1110 | return ret; |
1111 | } |
1112 | |
1113 | return 0; |
1114 | } |
1115 | |
1116 | static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) |
1117 | { |
1118 | struct perf_session *session = inject->session; |
1119 | struct evlist *evlist = session->evlist; |
1120 | struct machine *machine = &session->machines.host; |
1121 | size_t from = evlist->core.nr_entries - new_cnt; |
1122 | |
1123 | return __perf_event__synthesize_id_index(tool: &inject->tool, process: perf_event__repipe, |
1124 | evlist, machine, from); |
1125 | } |
1126 | |
1127 | static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) |
1128 | { |
1129 | struct hlist_head *head; |
1130 | struct guest_tid *guest_tid; |
1131 | int hash; |
1132 | |
1133 | hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); |
1134 | head = &gs->tids[hash]; |
1135 | |
1136 | hlist_for_each_entry(guest_tid, head, node) |
1137 | if (guest_tid->tid == tid) |
1138 | return guest_tid; |
1139 | |
1140 | return NULL; |
1141 | } |
1142 | |
1143 | static bool dso__is_in_kernel_space(struct dso *dso) |
1144 | { |
1145 | if (dso__is_vdso(dso)) |
1146 | return false; |
1147 | |
1148 | return dso__is_kcore(dso) || |
1149 | dso->kernel || |
1150 | is_kernel_module(pathname: dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); |
1151 | } |
1152 | |
1153 | static u64 evlist__first_id(struct evlist *evlist) |
1154 | { |
1155 | struct evsel *evsel; |
1156 | |
1157 | evlist__for_each_entry(evlist, evsel) { |
1158 | if (evsel->core.ids) |
1159 | return evsel->core.id[0]; |
1160 | } |
1161 | return 0; |
1162 | } |
1163 | |
1164 | static int process_build_id(struct perf_tool *tool, |
1165 | union perf_event *event, |
1166 | struct perf_sample *sample __maybe_unused, |
1167 | struct machine *machine __maybe_unused) |
1168 | { |
1169 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
1170 | |
1171 | return perf_event__process_build_id(session: inject->session, event); |
1172 | } |
1173 | |
1174 | static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) |
1175 | { |
1176 | struct machine *machine = perf_session__findnew_machine(session: inject->session, pid: machine_pid); |
1177 | u8 cpumode = dso__is_in_kernel_space(dso) ? |
1178 | PERF_RECORD_MISC_GUEST_KERNEL : |
1179 | PERF_RECORD_MISC_GUEST_USER; |
1180 | |
1181 | if (!machine) |
1182 | return -ENOMEM; |
1183 | |
1184 | dso->hit = 1; |
1185 | |
1186 | return perf_event__synthesize_build_id(tool: &inject->tool, pos: dso, misc: cpumode, |
1187 | process: process_build_id, machine); |
1188 | } |
1189 | |
1190 | static int guest_session__add_build_ids(struct guest_session *gs) |
1191 | { |
1192 | struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); |
1193 | struct machine *machine = &gs->session->machines.host; |
1194 | struct dso *dso; |
1195 | int ret; |
1196 | |
1197 | /* Build IDs will be put in the Build ID feature section */ |
1198 | perf_header__set_feat(header: &inject->session->header, feat: HEADER_BUILD_ID); |
1199 | |
1200 | dsos__for_each_with_build_id(dso, &machine->dsos.head) { |
1201 | ret = synthesize_build_id(inject, dso, machine_pid: gs->machine_pid); |
1202 | if (ret) |
1203 | return ret; |
1204 | } |
1205 | |
1206 | return 0; |
1207 | } |
1208 | |
1209 | static int guest_session__ksymbol_event(struct perf_tool *tool, |
1210 | union perf_event *event, |
1211 | struct perf_sample *sample __maybe_unused, |
1212 | struct machine *machine __maybe_unused) |
1213 | { |
1214 | struct guest_session *gs = container_of(tool, struct guest_session, tool); |
1215 | |
1216 | /* Only support out-of-line i.e. no BPF support */ |
1217 | if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) |
1218 | return 0; |
1219 | |
1220 | return guest_session__output_bytes(gs, buf: event, sz: event->header.size); |
1221 | } |
1222 | |
1223 | static int guest_session__start(struct guest_session *gs, const char *name, bool force) |
1224 | { |
1225 | char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX" ; |
1226 | struct perf_session *session; |
1227 | int ret; |
1228 | |
1229 | /* Only these events will be injected */ |
1230 | gs->tool.mmap = guest_session__repipe; |
1231 | gs->tool.mmap2 = guest_session__repipe; |
1232 | gs->tool.comm = guest_session__repipe; |
1233 | gs->tool.fork = guest_session__repipe; |
1234 | gs->tool.exit = guest_session__repipe; |
1235 | gs->tool.lost = guest_session__repipe; |
1236 | gs->tool.context_switch = guest_session__repipe; |
1237 | gs->tool.ksymbol = guest_session__ksymbol_event; |
1238 | gs->tool.text_poke = guest_session__repipe; |
1239 | /* |
1240 | * Processing a build ID creates a struct dso with that build ID. Later, |
1241 | * all guest dsos are iterated and the build IDs processed into the host |
1242 | * session where they will be output to the Build ID feature section |
1243 | * when the perf.data file header is written. |
1244 | */ |
1245 | gs->tool.build_id = perf_event__process_build_id; |
1246 | /* Process the id index to know what VCPU an ID belongs to */ |
1247 | gs->tool.id_index = perf_event__process_id_index; |
1248 | |
1249 | gs->tool.ordered_events = true; |
1250 | gs->tool.ordering_requires_timestamps = true; |
1251 | |
1252 | gs->data.path = name; |
1253 | gs->data.force = force; |
1254 | gs->data.mode = PERF_DATA_MODE_READ; |
1255 | |
1256 | session = perf_session__new(data: &gs->data, tool: &gs->tool); |
1257 | if (IS_ERR(ptr: session)) |
1258 | return PTR_ERR(ptr: session); |
1259 | gs->session = session; |
1260 | |
1261 | /* |
1262 | * Initial events have zero'd ID samples. Get default ID sample size |
1263 | * used for removing them. |
1264 | */ |
1265 | gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; |
1266 | /* And default ID for adding back a host-compatible ID sample */ |
1267 | gs->dflt_id = evlist__first_id(evlist: session->evlist); |
1268 | if (!gs->dflt_id) { |
1269 | pr_err("Guest data has no sample IDs" ); |
1270 | return -EINVAL; |
1271 | } |
1272 | |
1273 | /* Temporary file for guest events */ |
1274 | gs->tmp_file_name = strdup(tmp_file_name); |
1275 | if (!gs->tmp_file_name) |
1276 | return -ENOMEM; |
1277 | gs->tmp_fd = mkstemp(gs->tmp_file_name); |
1278 | if (gs->tmp_fd < 0) |
1279 | return -errno; |
1280 | |
1281 | if (zstd_init(&gs->session->zstd_data, 0) < 0) |
1282 | pr_warning("Guest session decompression initialization failed.\n" ); |
1283 | |
1284 | /* |
1285 | * perf does not support processing 2 sessions simultaneously, so output |
1286 | * guest events to a temporary file. |
1287 | */ |
1288 | ret = perf_session__process_events(session: gs->session); |
1289 | if (ret) |
1290 | return ret; |
1291 | |
1292 | if (lseek(gs->tmp_fd, 0, SEEK_SET)) |
1293 | return -errno; |
1294 | |
1295 | return 0; |
1296 | } |
1297 | |
1298 | /* Free hlist nodes assuming hlist_node is the first member of hlist entries */ |
1299 | static void free_hlist(struct hlist_head *heads, size_t hlist_sz) |
1300 | { |
1301 | struct hlist_node *pos, *n; |
1302 | size_t i; |
1303 | |
1304 | for (i = 0; i < hlist_sz; ++i) { |
1305 | hlist_for_each_safe(pos, n, &heads[i]) { |
1306 | hlist_del(n: pos); |
1307 | free(pos); |
1308 | } |
1309 | } |
1310 | } |
1311 | |
1312 | static void guest_session__exit(struct guest_session *gs) |
1313 | { |
1314 | if (gs->session) { |
1315 | perf_session__delete(session: gs->session); |
1316 | free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); |
1317 | free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); |
1318 | } |
1319 | if (gs->tmp_file_name) { |
1320 | if (gs->tmp_fd >= 0) |
1321 | close(gs->tmp_fd); |
1322 | unlink(gs->tmp_file_name); |
1323 | zfree(&gs->tmp_file_name); |
1324 | } |
1325 | zfree(&gs->vcpu); |
1326 | zfree(&gs->perf_data_file); |
1327 | } |
1328 | |
1329 | static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) |
1330 | { |
1331 | tc->time_shift = time_conv->time_shift; |
1332 | tc->time_mult = time_conv->time_mult; |
1333 | tc->time_zero = time_conv->time_zero; |
1334 | tc->time_cycles = time_conv->time_cycles; |
1335 | tc->time_mask = time_conv->time_mask; |
1336 | tc->cap_user_time_zero = time_conv->cap_user_time_zero; |
1337 | tc->cap_user_time_short = time_conv->cap_user_time_short; |
1338 | } |
1339 | |
1340 | static void guest_session__get_tc(struct guest_session *gs) |
1341 | { |
1342 | struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); |
1343 | |
1344 | get_tsc_conv(tc: &gs->host_tc, time_conv: &inject->session->time_conv); |
1345 | get_tsc_conv(tc: &gs->guest_tc, time_conv: &gs->session->time_conv); |
1346 | } |
1347 | |
1348 | static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) |
1349 | { |
1350 | u64 tsc; |
1351 | |
1352 | if (!guest_time) { |
1353 | *host_time = 0; |
1354 | return; |
1355 | } |
1356 | |
1357 | if (gs->guest_tc.cap_user_time_zero) |
1358 | tsc = perf_time_to_tsc(ns: guest_time, tc: &gs->guest_tc); |
1359 | else |
1360 | tsc = guest_time; |
1361 | |
1362 | /* |
1363 | * This is the correct order of operations for x86 if the TSC Offset and |
1364 | * Multiplier values are used. |
1365 | */ |
1366 | tsc -= gs->time_offset; |
1367 | tsc /= gs->time_scale; |
1368 | |
1369 | if (gs->host_tc.cap_user_time_zero) |
1370 | *host_time = tsc_to_perf_time(cyc: tsc, tc: &gs->host_tc); |
1371 | else |
1372 | *host_time = tsc; |
1373 | } |
1374 | |
1375 | static int guest_session__fetch(struct guest_session *gs) |
1376 | { |
1377 | void *buf; |
1378 | struct perf_event_header *hdr; |
1379 | size_t hdr_sz = sizeof(*hdr); |
1380 | ssize_t ret; |
1381 | |
1382 | buf = gs->ev.event_buf; |
1383 | if (!buf) { |
1384 | buf = malloc(PERF_SAMPLE_MAX_SIZE); |
1385 | if (!buf) |
1386 | return -ENOMEM; |
1387 | gs->ev.event_buf = buf; |
1388 | } |
1389 | hdr = buf; |
1390 | ret = readn(gs->tmp_fd, buf, hdr_sz); |
1391 | if (ret < 0) |
1392 | return ret; |
1393 | |
1394 | if (!ret) { |
1395 | /* Zero size means EOF */ |
1396 | hdr->size = 0; |
1397 | return 0; |
1398 | } |
1399 | |
1400 | buf += hdr_sz; |
1401 | |
1402 | ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); |
1403 | if (ret < 0) |
1404 | return ret; |
1405 | |
1406 | gs->ev.event = (union perf_event *)gs->ev.event_buf; |
1407 | gs->ev.sample.time = 0; |
1408 | |
1409 | if (hdr->type >= PERF_RECORD_USER_TYPE_START) { |
1410 | pr_err("Unexpected type fetching guest event" ); |
1411 | return 0; |
1412 | } |
1413 | |
1414 | ret = evlist__parse_sample(evlist: gs->session->evlist, event: gs->ev.event, sample: &gs->ev.sample); |
1415 | if (ret) { |
1416 | pr_err("Parse failed fetching guest event" ); |
1417 | return ret; |
1418 | } |
1419 | |
1420 | if (!gs->have_tc) { |
1421 | guest_session__get_tc(gs); |
1422 | gs->have_tc = true; |
1423 | } |
1424 | |
1425 | guest_session__convert_time(gs, guest_time: gs->ev.sample.time, host_time: &gs->ev.sample.time); |
1426 | |
1427 | return 0; |
1428 | } |
1429 | |
1430 | static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, |
1431 | const struct perf_sample *sample) |
1432 | { |
1433 | struct evsel *evsel; |
1434 | void *array; |
1435 | int ret; |
1436 | |
1437 | evsel = evlist__id2evsel(evlist, id: sample->id); |
1438 | array = ev; |
1439 | |
1440 | if (!evsel) { |
1441 | pr_err("No evsel for id %" PRIu64"\n" , sample->id); |
1442 | return -EINVAL; |
1443 | } |
1444 | |
1445 | array += ev->header.size; |
1446 | ret = perf_event__synthesize_id_sample(array, type: evsel->core.attr.sample_type, sample); |
1447 | if (ret < 0) |
1448 | return ret; |
1449 | |
1450 | if (ret & 7) { |
1451 | pr_err("Bad id sample size %d\n" , ret); |
1452 | return -EINVAL; |
1453 | } |
1454 | |
1455 | ev->header.size += ret; |
1456 | |
1457 | return 0; |
1458 | } |
1459 | |
1460 | static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) |
1461 | { |
1462 | struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); |
1463 | int ret; |
1464 | |
1465 | if (!gs->ready) |
1466 | return 0; |
1467 | |
1468 | while (1) { |
1469 | struct perf_sample *sample; |
1470 | struct guest_id *guest_id; |
1471 | union perf_event *ev; |
1472 | u16 id_hdr_size; |
1473 | u8 cpumode; |
1474 | u64 id; |
1475 | |
1476 | if (!gs->fetched) { |
1477 | ret = guest_session__fetch(gs); |
1478 | if (ret) |
1479 | return ret; |
1480 | gs->fetched = true; |
1481 | } |
1482 | |
1483 | ev = gs->ev.event; |
1484 | sample = &gs->ev.sample; |
1485 | |
1486 | if (!ev->header.size) |
1487 | return 0; /* EOF */ |
1488 | |
1489 | if (sample->time > timestamp) |
1490 | return 0; |
1491 | |
1492 | /* Change cpumode to guest */ |
1493 | cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; |
1494 | if (cpumode & PERF_RECORD_MISC_USER) |
1495 | cpumode = PERF_RECORD_MISC_GUEST_USER; |
1496 | else |
1497 | cpumode = PERF_RECORD_MISC_GUEST_KERNEL; |
1498 | ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; |
1499 | ev->header.misc |= cpumode; |
1500 | |
1501 | id = sample->id; |
1502 | if (!id) { |
1503 | id = gs->dflt_id; |
1504 | id_hdr_size = gs->dflt_id_hdr_size; |
1505 | } else { |
1506 | struct evsel *evsel = evlist__id2evsel(evlist: gs->session->evlist, id); |
1507 | |
1508 | id_hdr_size = evsel__id_hdr_size(evsel); |
1509 | } |
1510 | |
1511 | if (id_hdr_size & 7) { |
1512 | pr_err("Bad id_hdr_size %u\n" , id_hdr_size); |
1513 | return -EINVAL; |
1514 | } |
1515 | |
1516 | if (ev->header.size & 7) { |
1517 | pr_err("Bad event size %u\n" , ev->header.size); |
1518 | return -EINVAL; |
1519 | } |
1520 | |
1521 | /* Remove guest id sample */ |
1522 | ev->header.size -= id_hdr_size; |
1523 | |
1524 | if (ev->header.size & 7) { |
1525 | pr_err("Bad raw event size %u\n" , ev->header.size); |
1526 | return -EINVAL; |
1527 | } |
1528 | |
1529 | guest_id = guest_session__lookup_id(gs, id); |
1530 | if (!guest_id) { |
1531 | pr_err("Guest event with unknown id %llu\n" , |
1532 | (unsigned long long)id); |
1533 | return -EINVAL; |
1534 | } |
1535 | |
1536 | /* Change to host ID to avoid conflicting ID values */ |
1537 | sample->id = guest_id->host_id; |
1538 | sample->stream_id = guest_id->host_id; |
1539 | |
1540 | if (sample->cpu != (u32)-1) { |
1541 | if (sample->cpu >= gs->vcpu_cnt) { |
1542 | pr_err("Guest event with unknown VCPU %u\n" , |
1543 | sample->cpu); |
1544 | return -EINVAL; |
1545 | } |
1546 | /* Change to host CPU instead of guest VCPU */ |
1547 | sample->cpu = gs->vcpu[sample->cpu].cpu; |
1548 | } |
1549 | |
1550 | /* New id sample with new ID and CPU */ |
1551 | ret = evlist__append_id_sample(evlist: inject->session->evlist, ev, sample); |
1552 | if (ret) |
1553 | return ret; |
1554 | |
1555 | if (ev->header.size & 7) { |
1556 | pr_err("Bad new event size %u\n" , ev->header.size); |
1557 | return -EINVAL; |
1558 | } |
1559 | |
1560 | gs->fetched = false; |
1561 | |
1562 | ret = output_bytes(inject, buf: ev, sz: ev->header.size); |
1563 | if (ret) |
1564 | return ret; |
1565 | } |
1566 | } |
1567 | |
1568 | static int guest_session__flush_events(struct guest_session *gs) |
1569 | { |
1570 | return guest_session__inject_events(gs, timestamp: -1); |
1571 | } |
1572 | |
1573 | static int host__repipe(struct perf_tool *tool, |
1574 | union perf_event *event, |
1575 | struct perf_sample *sample, |
1576 | struct machine *machine) |
1577 | { |
1578 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
1579 | int ret; |
1580 | |
1581 | ret = guest_session__inject_events(gs: &inject->guest_session, timestamp: sample->time); |
1582 | if (ret) |
1583 | return ret; |
1584 | |
1585 | return perf_event__repipe(tool, event, sample, machine); |
1586 | } |
1587 | |
1588 | static int host__finished_init(struct perf_session *session, union perf_event *event) |
1589 | { |
1590 | struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool); |
1591 | struct guest_session *gs = &inject->guest_session; |
1592 | int ret; |
1593 | |
1594 | /* |
1595 | * Peek through host COMM events to find QEMU threads and the VCPU they |
1596 | * are running. |
1597 | */ |
1598 | ret = host_peek_vm_comms(session, gs); |
1599 | if (ret) |
1600 | return ret; |
1601 | |
1602 | if (!gs->vcpu_cnt) { |
1603 | pr_err("No VCPU threads found for pid %u\n" , gs->machine_pid); |
1604 | return -EINVAL; |
1605 | } |
1606 | |
1607 | /* |
1608 | * Allocate new (unused) host sample IDs and map them to the guest IDs. |
1609 | */ |
1610 | gs->highest_id = evlist__find_highest_id(evlist: session->evlist); |
1611 | ret = guest_session__map_ids(gs, host_evlist: session->evlist); |
1612 | if (ret) |
1613 | return ret; |
1614 | |
1615 | ret = guest_session__add_attrs(gs); |
1616 | if (ret) |
1617 | return ret; |
1618 | |
1619 | ret = synthesize_id_index(inject, new_cnt: gs->session->evlist->core.nr_entries); |
1620 | if (ret) { |
1621 | pr_err("Failed to synthesize id_index\n" ); |
1622 | return ret; |
1623 | } |
1624 | |
1625 | ret = guest_session__add_build_ids(gs); |
1626 | if (ret) { |
1627 | pr_err("Failed to add guest build IDs\n" ); |
1628 | return ret; |
1629 | } |
1630 | |
1631 | gs->ready = true; |
1632 | |
1633 | ret = guest_session__inject_events(gs, timestamp: 0); |
1634 | if (ret) |
1635 | return ret; |
1636 | |
1637 | return perf_event__repipe_op2_synth(session, event); |
1638 | } |
1639 | |
1640 | /* |
1641 | * Obey finished-round ordering. The FINISHED_ROUND event is first processed |
1642 | * which flushes host events to file up until the last flush time. Then inject |
1643 | * guest events up to the same time. Finally write out the FINISHED_ROUND event |
1644 | * itself. |
1645 | */ |
1646 | static int host__finished_round(struct perf_tool *tool, |
1647 | union perf_event *event, |
1648 | struct ordered_events *oe) |
1649 | { |
1650 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
1651 | int ret = perf_event__process_finished_round(tool, event, oe); |
1652 | u64 timestamp = ordered_events__last_flush_time(oe); |
1653 | |
1654 | if (ret) |
1655 | return ret; |
1656 | |
1657 | ret = guest_session__inject_events(gs: &inject->guest_session, timestamp); |
1658 | if (ret) |
1659 | return ret; |
1660 | |
1661 | return perf_event__repipe_oe_synth(tool, event, oe); |
1662 | } |
1663 | |
1664 | static int host__context_switch(struct perf_tool *tool, |
1665 | union perf_event *event, |
1666 | struct perf_sample *sample, |
1667 | struct machine *machine) |
1668 | { |
1669 | struct perf_inject *inject = container_of(tool, struct perf_inject, tool); |
1670 | bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; |
1671 | struct guest_session *gs = &inject->guest_session; |
1672 | u32 pid = event->context_switch.next_prev_pid; |
1673 | u32 tid = event->context_switch.next_prev_tid; |
1674 | struct guest_tid *guest_tid; |
1675 | u32 vcpu; |
1676 | |
1677 | if (out || pid != gs->machine_pid) |
1678 | goto out; |
1679 | |
1680 | guest_tid = guest_session__lookup_tid(gs, tid); |
1681 | if (!guest_tid) |
1682 | goto out; |
1683 | |
1684 | if (sample->cpu == (u32)-1) { |
1685 | pr_err("Switch event does not have CPU\n" ); |
1686 | return -EINVAL; |
1687 | } |
1688 | |
1689 | vcpu = guest_tid->vcpu; |
1690 | if (vcpu >= gs->vcpu_cnt) |
1691 | return -EINVAL; |
1692 | |
1693 | /* Guest is switching in, record which CPU the VCPU is now running on */ |
1694 | gs->vcpu[vcpu].cpu = sample->cpu; |
1695 | out: |
1696 | return host__repipe(tool, event, sample, machine); |
1697 | } |
1698 | |
1699 | static void sig_handler(int sig __maybe_unused) |
1700 | { |
1701 | session_done = 1; |
1702 | } |
1703 | |
1704 | static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) |
1705 | { |
1706 | struct perf_event_attr *attr = &evsel->core.attr; |
1707 | const char *name = evsel__name(evsel); |
1708 | |
1709 | if (!(attr->sample_type & sample_type)) { |
1710 | pr_err("Samples for %s event do not have %s attribute set." , |
1711 | name, sample_msg); |
1712 | return -EINVAL; |
1713 | } |
1714 | |
1715 | return 0; |
1716 | } |
1717 | |
1718 | static int drop_sample(struct perf_tool *tool __maybe_unused, |
1719 | union perf_event *event __maybe_unused, |
1720 | struct perf_sample *sample __maybe_unused, |
1721 | struct evsel *evsel __maybe_unused, |
1722 | struct machine *machine __maybe_unused) |
1723 | { |
1724 | return 0; |
1725 | } |
1726 | |
1727 | static void strip_init(struct perf_inject *inject) |
1728 | { |
1729 | struct evlist *evlist = inject->session->evlist; |
1730 | struct evsel *evsel; |
1731 | |
1732 | inject->tool.context_switch = perf_event__drop; |
1733 | |
1734 | evlist__for_each_entry(evlist, evsel) |
1735 | evsel->handler = drop_sample; |
1736 | } |
1737 | |
1738 | static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) |
1739 | { |
1740 | struct perf_inject *inject = opt->value; |
1741 | const char *args; |
1742 | char *dry_run; |
1743 | |
1744 | if (unset) |
1745 | return 0; |
1746 | |
1747 | inject->itrace_synth_opts.set = true; |
1748 | inject->itrace_synth_opts.vm_time_correlation = true; |
1749 | inject->in_place_update = true; |
1750 | |
1751 | if (!str) |
1752 | return 0; |
1753 | |
1754 | dry_run = skip_spaces(str); |
1755 | if (!strncmp(dry_run, "dry-run" , strlen("dry-run" ))) { |
1756 | inject->itrace_synth_opts.vm_tm_corr_dry_run = true; |
1757 | inject->in_place_update_dry_run = true; |
1758 | args = dry_run + strlen("dry-run" ); |
1759 | } else { |
1760 | args = str; |
1761 | } |
1762 | |
1763 | inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); |
1764 | |
1765 | return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; |
1766 | } |
1767 | |
1768 | static int parse_guest_data(const struct option *opt, const char *str, int unset) |
1769 | { |
1770 | struct perf_inject *inject = opt->value; |
1771 | struct guest_session *gs = &inject->guest_session; |
1772 | char *tok; |
1773 | char *s; |
1774 | |
1775 | if (unset) |
1776 | return 0; |
1777 | |
1778 | if (!str) |
1779 | goto bad_args; |
1780 | |
1781 | s = strdup(str); |
1782 | if (!s) |
1783 | return -ENOMEM; |
1784 | |
1785 | gs->perf_data_file = strsep(&s, "," ); |
1786 | if (!gs->perf_data_file) |
1787 | goto bad_args; |
1788 | |
1789 | gs->copy_kcore_dir = has_kcore_dir(path: gs->perf_data_file); |
1790 | if (gs->copy_kcore_dir) |
1791 | inject->output.is_dir = true; |
1792 | |
1793 | tok = strsep(&s, "," ); |
1794 | if (!tok) |
1795 | goto bad_args; |
1796 | gs->machine_pid = strtoul(tok, NULL, 0); |
1797 | if (!inject->guest_session.machine_pid) |
1798 | goto bad_args; |
1799 | |
1800 | gs->time_scale = 1; |
1801 | |
1802 | tok = strsep(&s, "," ); |
1803 | if (!tok) |
1804 | goto out; |
1805 | gs->time_offset = strtoull(tok, NULL, 0); |
1806 | |
1807 | tok = strsep(&s, "," ); |
1808 | if (!tok) |
1809 | goto out; |
1810 | gs->time_scale = strtod(tok, NULL); |
1811 | if (!gs->time_scale) |
1812 | goto bad_args; |
1813 | out: |
1814 | return 0; |
1815 | |
1816 | bad_args: |
1817 | pr_err("--guest-data option requires guest perf.data file name, " |
1818 | "guest machine PID, and optionally guest timestamp offset, " |
1819 | "and guest timestamp scale factor, separated by commas.\n" ); |
1820 | return -1; |
1821 | } |
1822 | |
1823 | static int save_section_info_cb(struct perf_file_section *section, |
1824 | struct perf_header *ph __maybe_unused, |
1825 | int feat, int fd __maybe_unused, void *data) |
1826 | { |
1827 | struct perf_inject *inject = data; |
1828 | |
1829 | inject->secs[feat] = *section; |
1830 | return 0; |
1831 | } |
1832 | |
1833 | static int save_section_info(struct perf_inject *inject) |
1834 | { |
1835 | struct perf_header * = &inject->session->header; |
1836 | int fd = perf_data__fd(data: inject->session->data); |
1837 | |
1838 | return perf_header__process_sections(header, fd, data: inject, process: save_section_info_cb); |
1839 | } |
1840 | |
1841 | static bool keep_feat(int feat) |
1842 | { |
1843 | switch (feat) { |
1844 | /* Keep original information that describes the machine or software */ |
1845 | case HEADER_TRACING_DATA: |
1846 | case HEADER_HOSTNAME: |
1847 | case HEADER_OSRELEASE: |
1848 | case HEADER_VERSION: |
1849 | case HEADER_ARCH: |
1850 | case HEADER_NRCPUS: |
1851 | case HEADER_CPUDESC: |
1852 | case HEADER_CPUID: |
1853 | case HEADER_TOTAL_MEM: |
1854 | case HEADER_CPU_TOPOLOGY: |
1855 | case HEADER_NUMA_TOPOLOGY: |
1856 | case HEADER_PMU_MAPPINGS: |
1857 | case HEADER_CACHE: |
1858 | case HEADER_MEM_TOPOLOGY: |
1859 | case HEADER_CLOCKID: |
1860 | case HEADER_BPF_PROG_INFO: |
1861 | case HEADER_BPF_BTF: |
1862 | case HEADER_CPU_PMU_CAPS: |
1863 | case HEADER_CLOCK_DATA: |
1864 | case HEADER_HYBRID_TOPOLOGY: |
1865 | case HEADER_PMU_CAPS: |
1866 | return true; |
1867 | /* Information that can be updated */ |
1868 | case HEADER_BUILD_ID: |
1869 | case HEADER_CMDLINE: |
1870 | case HEADER_EVENT_DESC: |
1871 | case HEADER_BRANCH_STACK: |
1872 | case HEADER_GROUP_DESC: |
1873 | case HEADER_AUXTRACE: |
1874 | case HEADER_STAT: |
1875 | case HEADER_SAMPLE_TIME: |
1876 | case HEADER_DIR_FORMAT: |
1877 | case HEADER_COMPRESSED: |
1878 | default: |
1879 | return false; |
1880 | }; |
1881 | } |
1882 | |
1883 | static int read_file(int fd, u64 offs, void *buf, size_t sz) |
1884 | { |
1885 | ssize_t ret = preadn(fd, buf, sz, offs); |
1886 | |
1887 | if (ret < 0) |
1888 | return -errno; |
1889 | if ((size_t)ret != sz) |
1890 | return -EINVAL; |
1891 | return 0; |
1892 | } |
1893 | |
1894 | static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) |
1895 | { |
1896 | int fd = perf_data__fd(data: inject->session->data); |
1897 | u64 offs = inject->secs[feat].offset; |
1898 | size_t sz = inject->secs[feat].size; |
1899 | void *buf = malloc(sz); |
1900 | int ret; |
1901 | |
1902 | if (!buf) |
1903 | return -ENOMEM; |
1904 | |
1905 | ret = read_file(fd, offs, buf, sz); |
1906 | if (ret) |
1907 | goto out_free; |
1908 | |
1909 | ret = fw->write(fw, buf, sz); |
1910 | out_free: |
1911 | free(buf); |
1912 | return ret; |
1913 | } |
1914 | |
1915 | struct inject_fc { |
1916 | struct feat_copier fc; |
1917 | struct perf_inject *inject; |
1918 | }; |
1919 | |
1920 | static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) |
1921 | { |
1922 | struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); |
1923 | struct perf_inject *inject = inj_fc->inject; |
1924 | int ret; |
1925 | |
1926 | if (!inject->secs[feat].offset || |
1927 | !keep_feat(feat)) |
1928 | return 0; |
1929 | |
1930 | ret = feat_copy(inject, feat, fw); |
1931 | if (ret < 0) |
1932 | return ret; |
1933 | |
1934 | return 1; /* Feature section copied */ |
1935 | } |
1936 | |
1937 | static int copy_kcore_dir(struct perf_inject *inject) |
1938 | { |
1939 | char *cmd; |
1940 | int ret; |
1941 | |
1942 | ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1" , |
1943 | inject->input_name, inject->output.path); |
1944 | if (ret < 0) |
1945 | return ret; |
1946 | pr_debug("%s\n" , cmd); |
1947 | ret = system(cmd); |
1948 | free(cmd); |
1949 | return ret; |
1950 | } |
1951 | |
1952 | static int guest_session__copy_kcore_dir(struct guest_session *gs) |
1953 | { |
1954 | struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); |
1955 | char *cmd; |
1956 | int ret; |
1957 | |
1958 | ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1" , |
1959 | gs->perf_data_file, inject->output.path, gs->machine_pid); |
1960 | if (ret < 0) |
1961 | return ret; |
1962 | pr_debug("%s\n" , cmd); |
1963 | ret = system(cmd); |
1964 | free(cmd); |
1965 | return ret; |
1966 | } |
1967 | |
1968 | static int output_fd(struct perf_inject *inject) |
1969 | { |
1970 | return inject->in_place_update ? -1 : perf_data__fd(data: &inject->output); |
1971 | } |
1972 | |
1973 | static int __cmd_inject(struct perf_inject *inject) |
1974 | { |
1975 | int ret = -EINVAL; |
1976 | struct guest_session *gs = &inject->guest_session; |
1977 | struct perf_session *session = inject->session; |
1978 | int fd = output_fd(inject); |
1979 | u64 output_data_offset; |
1980 | |
1981 | signal(SIGINT, sig_handler); |
1982 | |
1983 | if (inject->build_ids || inject->sched_stat || |
1984 | inject->itrace_synth_opts.set || inject->build_id_all) { |
1985 | inject->tool.mmap = perf_event__repipe_mmap; |
1986 | inject->tool.mmap2 = perf_event__repipe_mmap2; |
1987 | inject->tool.fork = perf_event__repipe_fork; |
1988 | #ifdef HAVE_LIBTRACEEVENT |
1989 | inject->tool.tracing_data = perf_event__repipe_tracing_data; |
1990 | #endif |
1991 | } |
1992 | |
1993 | output_data_offset = perf_session__data_offset(evlist: session->evlist); |
1994 | |
1995 | if (inject->build_id_all) { |
1996 | inject->tool.mmap = perf_event__repipe_buildid_mmap; |
1997 | inject->tool.mmap2 = perf_event__repipe_buildid_mmap2; |
1998 | } else if (inject->build_ids) { |
1999 | inject->tool.sample = perf_event__inject_buildid; |
2000 | } else if (inject->sched_stat) { |
2001 | struct evsel *evsel; |
2002 | |
2003 | evlist__for_each_entry(session->evlist, evsel) { |
2004 | const char *name = evsel__name(evsel); |
2005 | |
2006 | if (!strcmp(name, "sched:sched_switch" )) { |
2007 | if (evsel__check_stype(evsel, sample_type: PERF_SAMPLE_TID, sample_msg: "TID" )) |
2008 | return -EINVAL; |
2009 | |
2010 | evsel->handler = perf_inject__sched_switch; |
2011 | } else if (!strcmp(name, "sched:sched_process_exit" )) |
2012 | evsel->handler = perf_inject__sched_process_exit; |
2013 | #ifdef HAVE_LIBTRACEEVENT |
2014 | else if (!strncmp(name, "sched:sched_stat_" , 17)) |
2015 | evsel->handler = perf_inject__sched_stat; |
2016 | #endif |
2017 | } |
2018 | } else if (inject->itrace_synth_opts.vm_time_correlation) { |
2019 | session->itrace_synth_opts = &inject->itrace_synth_opts; |
2020 | memset(&inject->tool, 0, sizeof(inject->tool)); |
2021 | inject->tool.id_index = perf_event__process_id_index; |
2022 | inject->tool.auxtrace_info = perf_event__process_auxtrace_info; |
2023 | inject->tool.auxtrace = perf_event__process_auxtrace; |
2024 | inject->tool.auxtrace_error = perf_event__process_auxtrace_error; |
2025 | inject->tool.ordered_events = true; |
2026 | inject->tool.ordering_requires_timestamps = true; |
2027 | } else if (inject->itrace_synth_opts.set) { |
2028 | session->itrace_synth_opts = &inject->itrace_synth_opts; |
2029 | inject->itrace_synth_opts.inject = true; |
2030 | inject->tool.comm = perf_event__repipe_comm; |
2031 | inject->tool.namespaces = perf_event__repipe_namespaces; |
2032 | inject->tool.exit = perf_event__repipe_exit; |
2033 | inject->tool.id_index = perf_event__process_id_index; |
2034 | inject->tool.auxtrace_info = perf_event__process_auxtrace_info; |
2035 | inject->tool.auxtrace = perf_event__process_auxtrace; |
2036 | inject->tool.aux = perf_event__drop_aux; |
2037 | inject->tool.itrace_start = perf_event__drop_aux; |
2038 | inject->tool.aux_output_hw_id = perf_event__drop_aux; |
2039 | inject->tool.ordered_events = true; |
2040 | inject->tool.ordering_requires_timestamps = true; |
2041 | /* Allow space in the header for new attributes */ |
2042 | output_data_offset = roundup(8192 + session->header.data_offset, 4096); |
2043 | if (inject->strip) |
2044 | strip_init(inject); |
2045 | } else if (gs->perf_data_file) { |
2046 | char *name = gs->perf_data_file; |
2047 | |
2048 | /* |
2049 | * Not strictly necessary, but keep these events in order wrt |
2050 | * guest events. |
2051 | */ |
2052 | inject->tool.mmap = host__repipe; |
2053 | inject->tool.mmap2 = host__repipe; |
2054 | inject->tool.comm = host__repipe; |
2055 | inject->tool.fork = host__repipe; |
2056 | inject->tool.exit = host__repipe; |
2057 | inject->tool.lost = host__repipe; |
2058 | inject->tool.context_switch = host__repipe; |
2059 | inject->tool.ksymbol = host__repipe; |
2060 | inject->tool.text_poke = host__repipe; |
2061 | /* |
2062 | * Once the host session has initialized, set up sample ID |
2063 | * mapping and feed in guest attrs, build IDs and initial |
2064 | * events. |
2065 | */ |
2066 | inject->tool.finished_init = host__finished_init; |
2067 | /* Obey finished round ordering */ |
2068 | inject->tool.finished_round = host__finished_round, |
2069 | /* Keep track of which CPU a VCPU is runnng on */ |
2070 | inject->tool.context_switch = host__context_switch; |
2071 | /* |
2072 | * Must order events to be able to obey finished round |
2073 | * ordering. |
2074 | */ |
2075 | inject->tool.ordered_events = true; |
2076 | inject->tool.ordering_requires_timestamps = true; |
2077 | /* Set up a separate session to process guest perf.data file */ |
2078 | ret = guest_session__start(gs, name, force: session->data->force); |
2079 | if (ret) { |
2080 | pr_err("Failed to process %s, error %d\n" , name, ret); |
2081 | return ret; |
2082 | } |
2083 | /* Allow space in the header for guest attributes */ |
2084 | output_data_offset += gs->session->header.data_offset; |
2085 | output_data_offset = roundup(output_data_offset, 4096); |
2086 | } |
2087 | |
2088 | if (!inject->itrace_synth_opts.set) |
2089 | auxtrace_index__free(head: &session->auxtrace_index); |
2090 | |
2091 | if (!inject->is_pipe && !inject->in_place_update) |
2092 | lseek(fd, output_data_offset, SEEK_SET); |
2093 | |
2094 | ret = perf_session__process_events(session); |
2095 | if (ret) |
2096 | return ret; |
2097 | |
2098 | if (gs->session) { |
2099 | /* |
2100 | * Remaining guest events have later timestamps. Flush them |
2101 | * out to file. |
2102 | */ |
2103 | ret = guest_session__flush_events(gs); |
2104 | if (ret) { |
2105 | pr_err("Failed to flush guest events\n" ); |
2106 | return ret; |
2107 | } |
2108 | } |
2109 | |
2110 | if (!inject->is_pipe && !inject->in_place_update) { |
2111 | struct inject_fc inj_fc = { |
2112 | .fc.copy = feat_copy_cb, |
2113 | .inject = inject, |
2114 | }; |
2115 | |
2116 | if (inject->build_ids) |
2117 | perf_header__set_feat(header: &session->header, |
2118 | feat: HEADER_BUILD_ID); |
2119 | /* |
2120 | * Keep all buildids when there is unprocessed AUX data because |
2121 | * it is not known which ones the AUX trace hits. |
2122 | */ |
2123 | if (perf_header__has_feat(header: &session->header, feat: HEADER_BUILD_ID) && |
2124 | inject->have_auxtrace && !inject->itrace_synth_opts.set) |
2125 | dsos__hit_all(session); |
2126 | /* |
2127 | * The AUX areas have been removed and replaced with |
2128 | * synthesized hardware events, so clear the feature flag. |
2129 | */ |
2130 | if (inject->itrace_synth_opts.set) { |
2131 | perf_header__clear_feat(header: &session->header, |
2132 | feat: HEADER_AUXTRACE); |
2133 | if (inject->itrace_synth_opts.last_branch || |
2134 | inject->itrace_synth_opts.add_last_branch) |
2135 | perf_header__set_feat(header: &session->header, |
2136 | feat: HEADER_BRANCH_STACK); |
2137 | } |
2138 | session->header.data_offset = output_data_offset; |
2139 | session->header.data_size = inject->bytes_written; |
2140 | perf_session__inject_header(session, evlist: session->evlist, fd, fc: &inj_fc.fc); |
2141 | |
2142 | if (inject->copy_kcore_dir) { |
2143 | ret = copy_kcore_dir(inject); |
2144 | if (ret) { |
2145 | pr_err("Failed to copy kcore\n" ); |
2146 | return ret; |
2147 | } |
2148 | } |
2149 | if (gs->copy_kcore_dir) { |
2150 | ret = guest_session__copy_kcore_dir(gs); |
2151 | if (ret) { |
2152 | pr_err("Failed to copy guest kcore\n" ); |
2153 | return ret; |
2154 | } |
2155 | } |
2156 | } |
2157 | |
2158 | return ret; |
2159 | } |
2160 | |
2161 | int cmd_inject(int argc, const char **argv) |
2162 | { |
2163 | struct perf_inject inject = { |
2164 | .tool = { |
2165 | .sample = perf_event__repipe_sample, |
2166 | .read = perf_event__repipe_sample, |
2167 | .mmap = perf_event__repipe, |
2168 | .mmap2 = perf_event__repipe, |
2169 | .comm = perf_event__repipe, |
2170 | .namespaces = perf_event__repipe, |
2171 | .cgroup = perf_event__repipe, |
2172 | .fork = perf_event__repipe, |
2173 | .exit = perf_event__repipe, |
2174 | .lost = perf_event__repipe, |
2175 | .lost_samples = perf_event__repipe, |
2176 | .aux = perf_event__repipe, |
2177 | .itrace_start = perf_event__repipe, |
2178 | .aux_output_hw_id = perf_event__repipe, |
2179 | .context_switch = perf_event__repipe, |
2180 | .throttle = perf_event__repipe, |
2181 | .unthrottle = perf_event__repipe, |
2182 | .ksymbol = perf_event__repipe, |
2183 | .bpf = perf_event__repipe, |
2184 | .text_poke = perf_event__repipe, |
2185 | .attr = perf_event__repipe_attr, |
2186 | .event_update = perf_event__repipe_event_update, |
2187 | .tracing_data = perf_event__repipe_op2_synth, |
2188 | .finished_round = perf_event__repipe_oe_synth, |
2189 | .build_id = perf_event__repipe_op2_synth, |
2190 | .id_index = perf_event__repipe_op2_synth, |
2191 | .auxtrace_info = perf_event__repipe_op2_synth, |
2192 | .auxtrace_error = perf_event__repipe_op2_synth, |
2193 | .time_conv = perf_event__repipe_op2_synth, |
2194 | .thread_map = perf_event__repipe_op2_synth, |
2195 | .cpu_map = perf_event__repipe_op2_synth, |
2196 | .stat_config = perf_event__repipe_op2_synth, |
2197 | .stat = perf_event__repipe_op2_synth, |
2198 | .stat_round = perf_event__repipe_op2_synth, |
2199 | .feature = perf_event__repipe_op2_synth, |
2200 | .finished_init = perf_event__repipe_op2_synth, |
2201 | .compressed = perf_event__repipe_op4_synth, |
2202 | .auxtrace = perf_event__repipe_auxtrace, |
2203 | }, |
2204 | .input_name = "-" , |
2205 | .samples = LIST_HEAD_INIT(inject.samples), |
2206 | .output = { |
2207 | .path = "-" , |
2208 | .mode = PERF_DATA_MODE_WRITE, |
2209 | .use_stdio = true, |
2210 | }, |
2211 | }; |
2212 | struct perf_data data = { |
2213 | .mode = PERF_DATA_MODE_READ, |
2214 | .use_stdio = true, |
2215 | }; |
2216 | int ret; |
2217 | bool repipe = true; |
2218 | const char *known_build_ids = NULL; |
2219 | |
2220 | struct option options[] = { |
2221 | OPT_BOOLEAN('b', "build-ids" , &inject.build_ids, |
2222 | "Inject build-ids into the output stream" ), |
2223 | OPT_BOOLEAN(0, "buildid-all" , &inject.build_id_all, |
2224 | "Inject build-ids of all DSOs into the output stream" ), |
2225 | OPT_STRING(0, "known-build-ids" , &known_build_ids, |
2226 | "buildid path [,buildid path...]" , |
2227 | "build-ids to use for given paths" ), |
2228 | OPT_STRING('i', "input" , &inject.input_name, "file" , |
2229 | "input file name" ), |
2230 | OPT_STRING('o', "output" , &inject.output.path, "file" , |
2231 | "output file name" ), |
2232 | OPT_BOOLEAN('s', "sched-stat" , &inject.sched_stat, |
2233 | "Merge sched-stat and sched-switch for getting events " |
2234 | "where and how long tasks slept" ), |
2235 | #ifdef HAVE_JITDUMP |
2236 | OPT_BOOLEAN('j', "jit" , &inject.jit_mode, "merge jitdump files into perf.data file" ), |
2237 | #endif |
2238 | OPT_INCR('v', "verbose" , &verbose, |
2239 | "be more verbose (show build ids, etc)" ), |
2240 | OPT_STRING('k', "vmlinux" , &symbol_conf.vmlinux_name, |
2241 | "file" , "vmlinux pathname" ), |
2242 | OPT_BOOLEAN(0, "ignore-vmlinux" , &symbol_conf.ignore_vmlinux, |
2243 | "don't load vmlinux even if found" ), |
2244 | OPT_STRING(0, "kallsyms" , &symbol_conf.kallsyms_name, "file" , |
2245 | "kallsyms pathname" ), |
2246 | OPT_BOOLEAN('f', "force" , &data.force, "don't complain, do it" ), |
2247 | OPT_CALLBACK_OPTARG(0, "itrace" , &inject.itrace_synth_opts, |
2248 | NULL, "opts" , "Instruction Tracing options\n" |
2249 | ITRACE_HELP, |
2250 | itrace_parse_synth_opts), |
2251 | OPT_BOOLEAN(0, "strip" , &inject.strip, |
2252 | "strip non-synthesized events (use with --itrace)" ), |
2253 | OPT_CALLBACK_OPTARG(0, "vm-time-correlation" , &inject, NULL, "opts" , |
2254 | "correlate time between VM guests and the host" , |
2255 | parse_vm_time_correlation), |
2256 | OPT_CALLBACK_OPTARG(0, "guest-data" , &inject, NULL, "opts" , |
2257 | "inject events from a guest perf.data file" , |
2258 | parse_guest_data), |
2259 | OPT_STRING(0, "guestmount" , &symbol_conf.guestmount, "directory" , |
2260 | "guest mount directory under which every guest os" |
2261 | " instance has a subdir" ), |
2262 | OPT_END() |
2263 | }; |
2264 | const char * const inject_usage[] = { |
2265 | "perf inject [<options>]" , |
2266 | NULL |
2267 | }; |
2268 | #ifndef HAVE_JITDUMP |
2269 | set_option_nobuild(options, 'j', "jit" , "NO_LIBELF=1" , true); |
2270 | #endif |
2271 | argc = parse_options(argc, argv, options, inject_usage, 0); |
2272 | |
2273 | /* |
2274 | * Any (unrecognized) arguments left? |
2275 | */ |
2276 | if (argc) |
2277 | usage_with_options(inject_usage, options); |
2278 | |
2279 | if (inject.strip && !inject.itrace_synth_opts.set) { |
2280 | pr_err("--strip option requires --itrace option\n" ); |
2281 | return -1; |
2282 | } |
2283 | |
2284 | if (symbol__validate_sym_arguments()) |
2285 | return -1; |
2286 | |
2287 | if (inject.in_place_update) { |
2288 | if (!strcmp(inject.input_name, "-" )) { |
2289 | pr_err("Input file name required for in-place updating\n" ); |
2290 | return -1; |
2291 | } |
2292 | if (strcmp(inject.output.path, "-" )) { |
2293 | pr_err("Output file name must not be specified for in-place updating\n" ); |
2294 | return -1; |
2295 | } |
2296 | if (!data.force && !inject.in_place_update_dry_run) { |
2297 | pr_err("The input file would be updated in place, " |
2298 | "the --force option is required.\n" ); |
2299 | return -1; |
2300 | } |
2301 | if (!inject.in_place_update_dry_run) |
2302 | data.in_place_update = true; |
2303 | } else { |
2304 | if (strcmp(inject.output.path, "-" ) && !inject.strip && |
2305 | has_kcore_dir(path: inject.input_name)) { |
2306 | inject.output.is_dir = true; |
2307 | inject.copy_kcore_dir = true; |
2308 | } |
2309 | if (perf_data__open(data: &inject.output)) { |
2310 | perror("failed to create output file" ); |
2311 | return -1; |
2312 | } |
2313 | } |
2314 | |
2315 | data.path = inject.input_name; |
2316 | if (!strcmp(inject.input_name, "-" ) || inject.output.is_pipe) { |
2317 | inject.is_pipe = true; |
2318 | /* |
2319 | * Do not repipe header when input is a regular file |
2320 | * since either it can rewrite the header at the end |
2321 | * or write a new pipe header. |
2322 | */ |
2323 | if (strcmp(inject.input_name, "-" )) |
2324 | repipe = false; |
2325 | } |
2326 | |
2327 | inject.session = __perf_session__new(data: &data, repipe, |
2328 | repipe_fd: output_fd(inject: &inject), |
2329 | tool: &inject.tool); |
2330 | if (IS_ERR(ptr: inject.session)) { |
2331 | ret = PTR_ERR(ptr: inject.session); |
2332 | goto out_close_output; |
2333 | } |
2334 | |
2335 | if (zstd_init(&(inject.session->zstd_data), 0) < 0) |
2336 | pr_warning("Decompression initialization failed.\n" ); |
2337 | |
2338 | /* Save original section info before feature bits change */ |
2339 | ret = save_section_info(inject: &inject); |
2340 | if (ret) |
2341 | goto out_delete; |
2342 | |
2343 | if (!data.is_pipe && inject.output.is_pipe) { |
2344 | ret = perf_header__write_pipe(fd: perf_data__fd(data: &inject.output)); |
2345 | if (ret < 0) { |
2346 | pr_err("Couldn't write a new pipe header.\n" ); |
2347 | goto out_delete; |
2348 | } |
2349 | |
2350 | ret = perf_event__synthesize_for_pipe(tool: &inject.tool, |
2351 | session: inject.session, |
2352 | data: &inject.output, |
2353 | process: perf_event__repipe); |
2354 | if (ret < 0) |
2355 | goto out_delete; |
2356 | } |
2357 | |
2358 | if (inject.build_ids && !inject.build_id_all) { |
2359 | /* |
2360 | * to make sure the mmap records are ordered correctly |
2361 | * and so that the correct especially due to jitted code |
2362 | * mmaps. We cannot generate the buildid hit list and |
2363 | * inject the jit mmaps at the same time for now. |
2364 | */ |
2365 | inject.tool.ordered_events = true; |
2366 | inject.tool.ordering_requires_timestamps = true; |
2367 | if (known_build_ids != NULL) { |
2368 | inject.known_build_ids = |
2369 | perf_inject__parse_known_build_ids(known_build_ids_string: known_build_ids); |
2370 | |
2371 | if (inject.known_build_ids == NULL) { |
2372 | pr_err("Couldn't parse known build ids.\n" ); |
2373 | goto out_delete; |
2374 | } |
2375 | } |
2376 | } |
2377 | |
2378 | if (inject.sched_stat) { |
2379 | inject.tool.ordered_events = true; |
2380 | } |
2381 | |
2382 | #ifdef HAVE_JITDUMP |
2383 | if (inject.jit_mode) { |
2384 | inject.tool.mmap2 = perf_event__jit_repipe_mmap2; |
2385 | inject.tool.mmap = perf_event__jit_repipe_mmap; |
2386 | inject.tool.ordered_events = true; |
2387 | inject.tool.ordering_requires_timestamps = true; |
2388 | /* |
2389 | * JIT MMAP injection injects all MMAP events in one go, so it |
2390 | * does not obey finished_round semantics. |
2391 | */ |
2392 | inject.tool.finished_round = perf_event__drop_oe; |
2393 | } |
2394 | #endif |
2395 | ret = symbol__init(env: &inject.session->header.env); |
2396 | if (ret < 0) |
2397 | goto out_delete; |
2398 | |
2399 | ret = __cmd_inject(inject: &inject); |
2400 | |
2401 | guest_session__exit(gs: &inject.guest_session); |
2402 | |
2403 | out_delete: |
2404 | strlist__delete(slist: inject.known_build_ids); |
2405 | zstd_fini(&(inject.session->zstd_data)); |
2406 | perf_session__delete(session: inject.session); |
2407 | out_close_output: |
2408 | if (!inject.in_place_update) |
2409 | perf_data__close(data: &inject.output); |
2410 | free(inject.itrace_synth_opts.vm_tm_corr_args); |
2411 | free(inject.event_copy); |
2412 | free(inject.guest_session.ev.event_buf); |
2413 | return ret; |
2414 | } |
2415 | |