1// SPDX-License-Identifier: GPL-2.0
2/*
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
8 */
9#include "builtin.h"
10
11#include "util/build-id.h"
12#include <subcmd/parse-options.h>
13#include <internal/xyarray.h>
14#include "util/parse-events.h"
15#include "util/config.h"
16
17#include "util/callchain.h"
18#include "util/cgroup.h"
19#include "util/header.h"
20#include "util/event.h"
21#include "util/evlist.h"
22#include "util/evsel.h"
23#include "util/debug.h"
24#include "util/mmap.h"
25#include "util/mutex.h"
26#include "util/target.h"
27#include "util/session.h"
28#include "util/tool.h"
29#include "util/symbol.h"
30#include "util/record.h"
31#include "util/cpumap.h"
32#include "util/thread_map.h"
33#include "util/data.h"
34#include "util/perf_regs.h"
35#include "util/auxtrace.h"
36#include "util/tsc.h"
37#include "util/parse-branch-options.h"
38#include "util/parse-regs-options.h"
39#include "util/perf_api_probe.h"
40#include "util/trigger.h"
41#include "util/perf-hooks.h"
42#include "util/cpu-set-sched.h"
43#include "util/synthetic-events.h"
44#include "util/time-utils.h"
45#include "util/units.h"
46#include "util/bpf-event.h"
47#include "util/util.h"
48#include "util/pfm.h"
49#include "util/pmu.h"
50#include "util/pmus.h"
51#include "util/clockid.h"
52#include "util/off_cpu.h"
53#include "util/bpf-filter.h"
54#include "asm/bug.h"
55#include "perf.h"
56#include "cputopo.h"
57
58#include <errno.h>
59#include <inttypes.h>
60#include <locale.h>
61#include <poll.h>
62#include <pthread.h>
63#include <unistd.h>
64#ifndef HAVE_GETTID
65#include <syscall.h>
66#endif
67#include <sched.h>
68#include <signal.h>
69#ifdef HAVE_EVENTFD_SUPPORT
70#include <sys/eventfd.h>
71#endif
72#include <sys/mman.h>
73#include <sys/wait.h>
74#include <sys/types.h>
75#include <sys/stat.h>
76#include <fcntl.h>
77#include <linux/err.h>
78#include <linux/string.h>
79#include <linux/time64.h>
80#include <linux/zalloc.h>
81#include <linux/bitmap.h>
82#include <sys/time.h>
83
84struct switch_output {
85 bool enabled;
86 bool signal;
87 unsigned long size;
88 unsigned long time;
89 const char *str;
90 bool set;
91 char **filenames;
92 int num_files;
93 int cur_file;
94};
95
96struct thread_mask {
97 struct mmap_cpu_mask maps;
98 struct mmap_cpu_mask affinity;
99};
100
101struct record_thread {
102 pid_t tid;
103 struct thread_mask *mask;
104 struct {
105 int msg[2];
106 int ack[2];
107 } pipes;
108 struct fdarray pollfd;
109 int ctlfd_pos;
110 int nr_mmaps;
111 struct mmap **maps;
112 struct mmap **overwrite_maps;
113 struct record *rec;
114 unsigned long long samples;
115 unsigned long waking;
116 u64 bytes_written;
117 u64 bytes_transferred;
118 u64 bytes_compressed;
119};
120
121static __thread struct record_thread *thread;
122
123enum thread_msg {
124 THREAD_MSG__UNDEFINED = 0,
125 THREAD_MSG__READY,
126 THREAD_MSG__MAX,
127};
128
129static const char *thread_msg_tags[THREAD_MSG__MAX] = {
130 "UNDEFINED", "READY"
131};
132
133enum thread_spec {
134 THREAD_SPEC__UNDEFINED = 0,
135 THREAD_SPEC__CPU,
136 THREAD_SPEC__CORE,
137 THREAD_SPEC__PACKAGE,
138 THREAD_SPEC__NUMA,
139 THREAD_SPEC__USER,
140 THREAD_SPEC__MAX,
141};
142
143static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
144 "undefined", "cpu", "core", "package", "numa", "user"
145};
146
147struct pollfd_index_map {
148 int evlist_pollfd_index;
149 int thread_pollfd_index;
150};
151
152struct record {
153 struct perf_tool tool;
154 struct record_opts opts;
155 u64 bytes_written;
156 u64 thread_bytes_written;
157 struct perf_data data;
158 struct auxtrace_record *itr;
159 struct evlist *evlist;
160 struct perf_session *session;
161 struct evlist *sb_evlist;
162 pthread_t thread_id;
163 int realtime_prio;
164 bool switch_output_event_set;
165 bool no_buildid;
166 bool no_buildid_set;
167 bool no_buildid_cache;
168 bool no_buildid_cache_set;
169 bool buildid_all;
170 bool buildid_mmap;
171 bool timestamp_filename;
172 bool timestamp_boundary;
173 bool off_cpu;
174 struct switch_output switch_output;
175 unsigned long long samples;
176 unsigned long output_max_size; /* = 0: unlimited */
177 struct perf_debuginfod debuginfod;
178 int nr_threads;
179 struct thread_mask *thread_masks;
180 struct record_thread *thread_data;
181 struct pollfd_index_map *index_map;
182 size_t index_map_sz;
183 size_t index_map_cnt;
184};
185
186static volatile int done;
187
188static volatile int auxtrace_record__snapshot_started;
189static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
190static DEFINE_TRIGGER(switch_output_trigger);
191
192static const char *affinity_tags[PERF_AFFINITY_MAX] = {
193 "SYS", "NODE", "CPU"
194};
195
196#ifndef HAVE_GETTID
197static inline pid_t gettid(void)
198{
199 return (pid_t)syscall(__NR_gettid);
200}
201#endif
202
203static int record__threads_enabled(struct record *rec)
204{
205 return rec->opts.threads_spec;
206}
207
208static bool switch_output_signal(struct record *rec)
209{
210 return rec->switch_output.signal &&
211 trigger_is_ready(t: &switch_output_trigger);
212}
213
214static bool switch_output_size(struct record *rec)
215{
216 return rec->switch_output.size &&
217 trigger_is_ready(t: &switch_output_trigger) &&
218 (rec->bytes_written >= rec->switch_output.size);
219}
220
221static bool switch_output_time(struct record *rec)
222{
223 return rec->switch_output.time &&
224 trigger_is_ready(t: &switch_output_trigger);
225}
226
227static u64 record__bytes_written(struct record *rec)
228{
229 return rec->bytes_written + rec->thread_bytes_written;
230}
231
232static bool record__output_max_size_exceeded(struct record *rec)
233{
234 return rec->output_max_size &&
235 (record__bytes_written(rec) >= rec->output_max_size);
236}
237
238static int record__write(struct record *rec, struct mmap *map __maybe_unused,
239 void *bf, size_t size)
240{
241 struct perf_data_file *file = &rec->session->data->file;
242
243 if (map && map->file)
244 file = map->file;
245
246 if (perf_data_file__write(file, buf: bf, size) < 0) {
247 pr_err("failed to write perf data, error: %m\n");
248 return -1;
249 }
250
251 if (map && map->file) {
252 thread->bytes_written += size;
253 rec->thread_bytes_written += size;
254 } else {
255 rec->bytes_written += size;
256 }
257
258 if (record__output_max_size_exceeded(rec) && !done) {
259 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
260 " stopping session ]\n",
261 record__bytes_written(rec) >> 10);
262 done = 1;
263 }
264
265 if (switch_output_size(rec))
266 trigger_hit(t: &switch_output_trigger);
267
268 return 0;
269}
270
271static int record__aio_enabled(struct record *rec);
272static int record__comp_enabled(struct record *rec);
273static size_t zstd_compress(struct perf_session *session, struct mmap *map,
274 void *dst, size_t dst_size, void *src, size_t src_size);
275
276#ifdef HAVE_AIO_SUPPORT
277static int record__aio_write(struct aiocb *cblock, int trace_fd,
278 void *buf, size_t size, off_t off)
279{
280 int rc;
281
282 cblock->aio_fildes = trace_fd;
283 cblock->aio_buf = buf;
284 cblock->aio_nbytes = size;
285 cblock->aio_offset = off;
286 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
287
288 do {
289 rc = aio_write(cblock);
290 if (rc == 0) {
291 break;
292 } else if (errno != EAGAIN) {
293 cblock->aio_fildes = -1;
294 pr_err("failed to queue perf data, error: %m\n");
295 break;
296 }
297 } while (1);
298
299 return rc;
300}
301
302static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
303{
304 void *rem_buf;
305 off_t rem_off;
306 size_t rem_size;
307 int rc, aio_errno;
308 ssize_t aio_ret, written;
309
310 aio_errno = aio_error(cblock);
311 if (aio_errno == EINPROGRESS)
312 return 0;
313
314 written = aio_ret = aio_return(cblock);
315 if (aio_ret < 0) {
316 if (aio_errno != EINTR)
317 pr_err("failed to write perf data, error: %m\n");
318 written = 0;
319 }
320
321 rem_size = cblock->aio_nbytes - written;
322
323 if (rem_size == 0) {
324 cblock->aio_fildes = -1;
325 /*
326 * md->refcount is incremented in record__aio_pushfn() for
327 * every aio write request started in record__aio_push() so
328 * decrement it because the request is now complete.
329 */
330 perf_mmap__put(&md->core);
331 rc = 1;
332 } else {
333 /*
334 * aio write request may require restart with the
335 * reminder if the kernel didn't write whole
336 * chunk at once.
337 */
338 rem_off = cblock->aio_offset + written;
339 rem_buf = (void *)(cblock->aio_buf + written);
340 record__aio_write(cblock, cblock->aio_fildes,
341 rem_buf, rem_size, rem_off);
342 rc = 0;
343 }
344
345 return rc;
346}
347
348static int record__aio_sync(struct mmap *md, bool sync_all)
349{
350 struct aiocb **aiocb = md->aio.aiocb;
351 struct aiocb *cblocks = md->aio.cblocks;
352 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
353 int i, do_suspend;
354
355 do {
356 do_suspend = 0;
357 for (i = 0; i < md->aio.nr_cblocks; ++i) {
358 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
359 if (sync_all)
360 aiocb[i] = NULL;
361 else
362 return i;
363 } else {
364 /*
365 * Started aio write is not complete yet
366 * so it has to be waited before the
367 * next allocation.
368 */
369 aiocb[i] = &cblocks[i];
370 do_suspend = 1;
371 }
372 }
373 if (!do_suspend)
374 return -1;
375
376 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
377 if (!(errno == EAGAIN || errno == EINTR))
378 pr_err("failed to sync perf data, error: %m\n");
379 }
380 } while (1);
381}
382
383struct record_aio {
384 struct record *rec;
385 void *data;
386 size_t size;
387};
388
389static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
390{
391 struct record_aio *aio = to;
392
393 /*
394 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
395 * to release space in the kernel buffer as fast as possible, calling
396 * perf_mmap__consume() from perf_mmap__push() function.
397 *
398 * That lets the kernel to proceed with storing more profiling data into
399 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
400 *
401 * Coping can be done in two steps in case the chunk of profiling data
402 * crosses the upper bound of the kernel buffer. In this case we first move
403 * part of data from map->start till the upper bound and then the reminder
404 * from the beginning of the kernel buffer till the end of the data chunk.
405 */
406
407 if (record__comp_enabled(aio->rec)) {
408 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
409 mmap__mmap_len(map) - aio->size,
410 buf, size);
411 } else {
412 memcpy(aio->data + aio->size, buf, size);
413 }
414
415 if (!aio->size) {
416 /*
417 * Increment map->refcount to guard map->aio.data[] buffer
418 * from premature deallocation because map object can be
419 * released earlier than aio write request started on
420 * map->aio.data[] buffer is complete.
421 *
422 * perf_mmap__put() is done at record__aio_complete()
423 * after started aio request completion or at record__aio_push()
424 * if the request failed to start.
425 */
426 perf_mmap__get(&map->core);
427 }
428
429 aio->size += size;
430
431 return size;
432}
433
434static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
435{
436 int ret, idx;
437 int trace_fd = rec->session->data->file.fd;
438 struct record_aio aio = { .rec = rec, .size = 0 };
439
440 /*
441 * Call record__aio_sync() to wait till map->aio.data[] buffer
442 * becomes available after previous aio write operation.
443 */
444
445 idx = record__aio_sync(map, false);
446 aio.data = map->aio.data[idx];
447 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
448 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
449 return ret;
450
451 rec->samples++;
452 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
453 if (!ret) {
454 *off += aio.size;
455 rec->bytes_written += aio.size;
456 if (switch_output_size(rec))
457 trigger_hit(&switch_output_trigger);
458 } else {
459 /*
460 * Decrement map->refcount incremented in record__aio_pushfn()
461 * back if record__aio_write() operation failed to start, otherwise
462 * map->refcount is decremented in record__aio_complete() after
463 * aio write operation finishes successfully.
464 */
465 perf_mmap__put(&map->core);
466 }
467
468 return ret;
469}
470
471static off_t record__aio_get_pos(int trace_fd)
472{
473 return lseek(trace_fd, 0, SEEK_CUR);
474}
475
476static void record__aio_set_pos(int trace_fd, off_t pos)
477{
478 lseek(trace_fd, pos, SEEK_SET);
479}
480
481static void record__aio_mmap_read_sync(struct record *rec)
482{
483 int i;
484 struct evlist *evlist = rec->evlist;
485 struct mmap *maps = evlist->mmap;
486
487 if (!record__aio_enabled(rec))
488 return;
489
490 for (i = 0; i < evlist->core.nr_mmaps; i++) {
491 struct mmap *map = &maps[i];
492
493 if (map->core.base)
494 record__aio_sync(map, true);
495 }
496}
497
498static int nr_cblocks_default = 1;
499static int nr_cblocks_max = 4;
500
501static int record__aio_parse(const struct option *opt,
502 const char *str,
503 int unset)
504{
505 struct record_opts *opts = (struct record_opts *)opt->value;
506
507 if (unset) {
508 opts->nr_cblocks = 0;
509 } else {
510 if (str)
511 opts->nr_cblocks = strtol(str, NULL, 0);
512 if (!opts->nr_cblocks)
513 opts->nr_cblocks = nr_cblocks_default;
514 }
515
516 return 0;
517}
518#else /* HAVE_AIO_SUPPORT */
519static int nr_cblocks_max = 0;
520
521static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
522 off_t *off __maybe_unused)
523{
524 return -1;
525}
526
527static off_t record__aio_get_pos(int trace_fd __maybe_unused)
528{
529 return -1;
530}
531
532static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
533{
534}
535
536static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
537{
538}
539#endif
540
541static int record__aio_enabled(struct record *rec)
542{
543 return rec->opts.nr_cblocks > 0;
544}
545
546#define MMAP_FLUSH_DEFAULT 1
547static int record__mmap_flush_parse(const struct option *opt,
548 const char *str,
549 int unset)
550{
551 int flush_max;
552 struct record_opts *opts = (struct record_opts *)opt->value;
553 static struct parse_tag tags[] = {
554 { .tag = 'B', .mult = 1 },
555 { .tag = 'K', .mult = 1 << 10 },
556 { .tag = 'M', .mult = 1 << 20 },
557 { .tag = 'G', .mult = 1 << 30 },
558 { .tag = 0 },
559 };
560
561 if (unset)
562 return 0;
563
564 if (str) {
565 opts->mmap_flush = parse_tag_value(str, tags);
566 if (opts->mmap_flush == (int)-1)
567 opts->mmap_flush = strtol(str, NULL, 0);
568 }
569
570 if (!opts->mmap_flush)
571 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
572
573 flush_max = evlist__mmap_size(pages: opts->mmap_pages);
574 flush_max /= 4;
575 if (opts->mmap_flush > flush_max)
576 opts->mmap_flush = flush_max;
577
578 return 0;
579}
580
581#ifdef HAVE_ZSTD_SUPPORT
582static unsigned int comp_level_default = 1;
583
584static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
585{
586 struct record_opts *opts = opt->value;
587
588 if (unset) {
589 opts->comp_level = 0;
590 } else {
591 if (str)
592 opts->comp_level = strtol(str, NULL, 0);
593 if (!opts->comp_level)
594 opts->comp_level = comp_level_default;
595 }
596
597 return 0;
598}
599#endif
600static unsigned int comp_level_max = 22;
601
602static int record__comp_enabled(struct record *rec)
603{
604 return rec->opts.comp_level > 0;
605}
606
607static int process_synthesized_event(struct perf_tool *tool,
608 union perf_event *event,
609 struct perf_sample *sample __maybe_unused,
610 struct machine *machine __maybe_unused)
611{
612 struct record *rec = container_of(tool, struct record, tool);
613 return record__write(rec, NULL, bf: event, size: event->header.size);
614}
615
616static struct mutex synth_lock;
617
618static int process_locked_synthesized_event(struct perf_tool *tool,
619 union perf_event *event,
620 struct perf_sample *sample __maybe_unused,
621 struct machine *machine __maybe_unused)
622{
623 int ret;
624
625 mutex_lock(&synth_lock);
626 ret = process_synthesized_event(tool, event, sample, machine);
627 mutex_unlock(lock: &synth_lock);
628 return ret;
629}
630
631static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
632{
633 struct record *rec = to;
634
635 if (record__comp_enabled(rec)) {
636 size = zstd_compress(session: rec->session, map, dst: map->data, dst_size: mmap__mmap_len(map), src: bf, src_size: size);
637 bf = map->data;
638 }
639
640 thread->samples++;
641 return record__write(rec, map, bf, size);
642}
643
644static volatile sig_atomic_t signr = -1;
645static volatile sig_atomic_t child_finished;
646#ifdef HAVE_EVENTFD_SUPPORT
647static volatile sig_atomic_t done_fd = -1;
648#endif
649
650static void sig_handler(int sig)
651{
652 if (sig == SIGCHLD)
653 child_finished = 1;
654 else
655 signr = sig;
656
657 done = 1;
658#ifdef HAVE_EVENTFD_SUPPORT
659 if (done_fd >= 0) {
660 u64 tmp = 1;
661 int orig_errno = errno;
662
663 /*
664 * It is possible for this signal handler to run after done is
665 * checked in the main loop, but before the perf counter fds are
666 * polled. If this happens, the poll() will continue to wait
667 * even though done is set, and will only break out if either
668 * another signal is received, or the counters are ready for
669 * read. To ensure the poll() doesn't sleep when done is set,
670 * use an eventfd (done_fd) to wake up the poll().
671 */
672 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
673 pr_err("failed to signal wakeup fd, error: %m\n");
674
675 errno = orig_errno;
676 }
677#endif // HAVE_EVENTFD_SUPPORT
678}
679
680static void sigsegv_handler(int sig)
681{
682 perf_hooks__recover();
683 sighandler_dump_stack(sig);
684}
685
686static void record__sig_exit(void)
687{
688 if (signr == -1)
689 return;
690
691 signal(signr, SIG_DFL);
692 raise(signr);
693}
694
695#ifdef HAVE_AUXTRACE_SUPPORT
696
697static int record__process_auxtrace(struct perf_tool *tool,
698 struct mmap *map,
699 union perf_event *event, void *data1,
700 size_t len1, void *data2, size_t len2)
701{
702 struct record *rec = container_of(tool, struct record, tool);
703 struct perf_data *data = &rec->data;
704 size_t padding;
705 u8 pad[8] = {0};
706
707 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
708 off_t file_offset;
709 int fd = perf_data__fd(data);
710 int err;
711
712 file_offset = lseek(fd, 0, SEEK_CUR);
713 if (file_offset == -1)
714 return -1;
715 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
716 event, file_offset);
717 if (err)
718 return err;
719 }
720
721 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
722 padding = (len1 + len2) & 7;
723 if (padding)
724 padding = 8 - padding;
725
726 record__write(rec, map, event, event->header.size);
727 record__write(rec, map, data1, len1);
728 if (len2)
729 record__write(rec, map, data2, len2);
730 record__write(rec, map, &pad, padding);
731
732 return 0;
733}
734
735static int record__auxtrace_mmap_read(struct record *rec,
736 struct mmap *map)
737{
738 int ret;
739
740 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
741 record__process_auxtrace);
742 if (ret < 0)
743 return ret;
744
745 if (ret)
746 rec->samples++;
747
748 return 0;
749}
750
751static int record__auxtrace_mmap_read_snapshot(struct record *rec,
752 struct mmap *map)
753{
754 int ret;
755
756 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
757 record__process_auxtrace,
758 rec->opts.auxtrace_snapshot_size);
759 if (ret < 0)
760 return ret;
761
762 if (ret)
763 rec->samples++;
764
765 return 0;
766}
767
768static int record__auxtrace_read_snapshot_all(struct record *rec)
769{
770 int i;
771 int rc = 0;
772
773 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
774 struct mmap *map = &rec->evlist->mmap[i];
775
776 if (!map->auxtrace_mmap.base)
777 continue;
778
779 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
780 rc = -1;
781 goto out;
782 }
783 }
784out:
785 return rc;
786}
787
788static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
789{
790 pr_debug("Recording AUX area tracing snapshot\n");
791 if (record__auxtrace_read_snapshot_all(rec) < 0) {
792 trigger_error(&auxtrace_snapshot_trigger);
793 } else {
794 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
795 trigger_error(&auxtrace_snapshot_trigger);
796 else
797 trigger_ready(&auxtrace_snapshot_trigger);
798 }
799}
800
801static int record__auxtrace_snapshot_exit(struct record *rec)
802{
803 if (trigger_is_error(&auxtrace_snapshot_trigger))
804 return 0;
805
806 if (!auxtrace_record__snapshot_started &&
807 auxtrace_record__snapshot_start(rec->itr))
808 return -1;
809
810 record__read_auxtrace_snapshot(rec, true);
811 if (trigger_is_error(&auxtrace_snapshot_trigger))
812 return -1;
813
814 return 0;
815}
816
817static int record__auxtrace_init(struct record *rec)
818{
819 int err;
820
821 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
822 && record__threads_enabled(rec)) {
823 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
824 return -EINVAL;
825 }
826
827 if (!rec->itr) {
828 rec->itr = auxtrace_record__init(rec->evlist, &err);
829 if (err)
830 return err;
831 }
832
833 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
834 rec->opts.auxtrace_snapshot_opts);
835 if (err)
836 return err;
837
838 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
839 rec->opts.auxtrace_sample_opts);
840 if (err)
841 return err;
842
843 auxtrace_regroup_aux_output(rec->evlist);
844
845 return auxtrace_parse_filters(rec->evlist);
846}
847
848#else
849
850static inline
851int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
852 struct mmap *map __maybe_unused)
853{
854 return 0;
855}
856
857static inline
858void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
859 bool on_exit __maybe_unused)
860{
861}
862
863static inline
864int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
865{
866 return 0;
867}
868
869static inline
870int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
871{
872 return 0;
873}
874
875static int record__auxtrace_init(struct record *rec __maybe_unused)
876{
877 return 0;
878}
879
880#endif
881
882static int record__config_text_poke(struct evlist *evlist)
883{
884 struct evsel *evsel;
885
886 /* Nothing to do if text poke is already configured */
887 evlist__for_each_entry(evlist, evsel) {
888 if (evsel->core.attr.text_poke)
889 return 0;
890 }
891
892 evsel = evlist__add_dummy_on_all_cpus(evlist);
893 if (!evsel)
894 return -ENOMEM;
895
896 evsel->core.attr.text_poke = 1;
897 evsel->core.attr.ksymbol = 1;
898 evsel->immediate = true;
899 evsel__set_sample_bit(evsel, TIME);
900
901 return 0;
902}
903
904static int record__config_off_cpu(struct record *rec)
905{
906 return off_cpu_prepare(evlist: rec->evlist, target: &rec->opts.target, opts: &rec->opts);
907}
908
909static bool record__tracking_system_wide(struct record *rec)
910{
911 struct evlist *evlist = rec->evlist;
912 struct evsel *evsel;
913
914 /*
915 * If non-dummy evsel exists, system_wide sideband is need to
916 * help parse sample information.
917 * For example, PERF_EVENT_MMAP event to help parse symbol,
918 * and PERF_EVENT_COMM event to help parse task executable name.
919 */
920 evlist__for_each_entry(evlist, evsel) {
921 if (!evsel__is_dummy_event(evsel))
922 return true;
923 }
924
925 return false;
926}
927
928static int record__config_tracking_events(struct record *rec)
929{
930 struct record_opts *opts = &rec->opts;
931 struct evlist *evlist = rec->evlist;
932 bool system_wide = false;
933 struct evsel *evsel;
934
935 /*
936 * For initial_delay, system wide or a hybrid system, we need to add
937 * tracking event so that we can track PERF_RECORD_MMAP to cover the
938 * delay of waiting or event synthesis.
939 */
940 if (opts->target.initial_delay || target__has_cpu(target: &opts->target) ||
941 perf_pmus__num_core_pmus() > 1) {
942
943 /*
944 * User space tasks can migrate between CPUs, so when tracing
945 * selected CPUs, sideband for all CPUs is still needed.
946 */
947 if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
948 system_wide = true;
949
950 evsel = evlist__findnew_tracking_event(evlist, system_wide);
951 if (!evsel)
952 return -ENOMEM;
953
954 /*
955 * Enable the tracking event when the process is forked for
956 * initial_delay, immediately for system wide.
957 */
958 if (opts->target.initial_delay && !evsel->immediate &&
959 !target__has_cpu(target: &opts->target))
960 evsel->core.attr.enable_on_exec = 1;
961 else
962 evsel->immediate = 1;
963 }
964
965 return 0;
966}
967
968static bool record__kcore_readable(struct machine *machine)
969{
970 char kcore[PATH_MAX];
971 int fd;
972
973 scnprintf(buf: kcore, size: sizeof(kcore), fmt: "%s/proc/kcore", machine->root_dir);
974
975 fd = open(kcore, O_RDONLY);
976 if (fd < 0)
977 return false;
978
979 close(fd);
980
981 return true;
982}
983
984static int record__kcore_copy(struct machine *machine, struct perf_data *data)
985{
986 char from_dir[PATH_MAX];
987 char kcore_dir[PATH_MAX];
988 int ret;
989
990 snprintf(buf: from_dir, size: sizeof(from_dir), fmt: "%s/proc", machine->root_dir);
991
992 ret = perf_data__make_kcore_dir(data, buf: kcore_dir, buf_sz: sizeof(kcore_dir));
993 if (ret)
994 return ret;
995
996 return kcore_copy(from_dir, to_dir: kcore_dir);
997}
998
999static void record__thread_data_init_pipes(struct record_thread *thread_data)
1000{
1001 thread_data->pipes.msg[0] = -1;
1002 thread_data->pipes.msg[1] = -1;
1003 thread_data->pipes.ack[0] = -1;
1004 thread_data->pipes.ack[1] = -1;
1005}
1006
1007static int record__thread_data_open_pipes(struct record_thread *thread_data)
1008{
1009 if (pipe(thread_data->pipes.msg))
1010 return -EINVAL;
1011
1012 if (pipe(thread_data->pipes.ack)) {
1013 close(thread_data->pipes.msg[0]);
1014 thread_data->pipes.msg[0] = -1;
1015 close(thread_data->pipes.msg[1]);
1016 thread_data->pipes.msg[1] = -1;
1017 return -EINVAL;
1018 }
1019
1020 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1021 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1022 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1023
1024 return 0;
1025}
1026
1027static void record__thread_data_close_pipes(struct record_thread *thread_data)
1028{
1029 if (thread_data->pipes.msg[0] != -1) {
1030 close(thread_data->pipes.msg[0]);
1031 thread_data->pipes.msg[0] = -1;
1032 }
1033 if (thread_data->pipes.msg[1] != -1) {
1034 close(thread_data->pipes.msg[1]);
1035 thread_data->pipes.msg[1] = -1;
1036 }
1037 if (thread_data->pipes.ack[0] != -1) {
1038 close(thread_data->pipes.ack[0]);
1039 thread_data->pipes.ack[0] = -1;
1040 }
1041 if (thread_data->pipes.ack[1] != -1) {
1042 close(thread_data->pipes.ack[1]);
1043 thread_data->pipes.ack[1] = -1;
1044 }
1045}
1046
1047static bool evlist__per_thread(struct evlist *evlist)
1048{
1049 return cpu_map__is_dummy(cpus: evlist->core.user_requested_cpus);
1050}
1051
1052static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1053{
1054 int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1055 struct mmap *mmap = evlist->mmap;
1056 struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1057 struct perf_cpu_map *cpus = evlist->core.all_cpus;
1058 bool per_thread = evlist__per_thread(evlist);
1059
1060 if (per_thread)
1061 thread_data->nr_mmaps = nr_mmaps;
1062 else
1063 thread_data->nr_mmaps = bitmap_weight(src: thread_data->mask->maps.bits,
1064 nbits: thread_data->mask->maps.nbits);
1065 if (mmap) {
1066 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1067 if (!thread_data->maps)
1068 return -ENOMEM;
1069 }
1070 if (overwrite_mmap) {
1071 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1072 if (!thread_data->overwrite_maps) {
1073 zfree(&thread_data->maps);
1074 return -ENOMEM;
1075 }
1076 }
1077 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1078 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1079
1080 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1081 if (per_thread ||
1082 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1083 if (thread_data->maps) {
1084 thread_data->maps[tm] = &mmap[m];
1085 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1086 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1087 }
1088 if (thread_data->overwrite_maps) {
1089 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1090 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1091 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1092 }
1093 tm++;
1094 }
1095 }
1096
1097 return 0;
1098}
1099
1100static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1101{
1102 int f, tm, pos;
1103 struct mmap *map, *overwrite_map;
1104
1105 fdarray__init(&thread_data->pollfd, 64);
1106
1107 for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1108 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1109 overwrite_map = thread_data->overwrite_maps ?
1110 thread_data->overwrite_maps[tm] : NULL;
1111
1112 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1113 void *ptr = evlist->core.pollfd.priv[f].ptr;
1114
1115 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1116 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1117 &evlist->core.pollfd);
1118 if (pos < 0)
1119 return pos;
1120 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1121 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1122 }
1123 }
1124 }
1125
1126 return 0;
1127}
1128
1129static void record__free_thread_data(struct record *rec)
1130{
1131 int t;
1132 struct record_thread *thread_data = rec->thread_data;
1133
1134 if (thread_data == NULL)
1135 return;
1136
1137 for (t = 0; t < rec->nr_threads; t++) {
1138 record__thread_data_close_pipes(thread_data: &thread_data[t]);
1139 zfree(&thread_data[t].maps);
1140 zfree(&thread_data[t].overwrite_maps);
1141 fdarray__exit(&thread_data[t].pollfd);
1142 }
1143
1144 zfree(&rec->thread_data);
1145}
1146
1147static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1148 int evlist_pollfd_index,
1149 int thread_pollfd_index)
1150{
1151 size_t x = rec->index_map_cnt;
1152
1153 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1154 return -ENOMEM;
1155 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1156 rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1157 rec->index_map_cnt += 1;
1158 return 0;
1159}
1160
1161static int record__update_evlist_pollfd_from_thread(struct record *rec,
1162 struct evlist *evlist,
1163 struct record_thread *thread_data)
1164{
1165 struct pollfd *e_entries = evlist->core.pollfd.entries;
1166 struct pollfd *t_entries = thread_data->pollfd.entries;
1167 int err = 0;
1168 size_t i;
1169
1170 for (i = 0; i < rec->index_map_cnt; i++) {
1171 int e_pos = rec->index_map[i].evlist_pollfd_index;
1172 int t_pos = rec->index_map[i].thread_pollfd_index;
1173
1174 if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1175 e_entries[e_pos].events != t_entries[t_pos].events) {
1176 pr_err("Thread and evlist pollfd index mismatch\n");
1177 err = -EINVAL;
1178 continue;
1179 }
1180 e_entries[e_pos].revents = t_entries[t_pos].revents;
1181 }
1182 return err;
1183}
1184
1185static int record__dup_non_perf_events(struct record *rec,
1186 struct evlist *evlist,
1187 struct record_thread *thread_data)
1188{
1189 struct fdarray *fda = &evlist->core.pollfd;
1190 int i, ret;
1191
1192 for (i = 0; i < fda->nr; i++) {
1193 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1194 continue;
1195 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1196 if (ret < 0) {
1197 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1198 return ret;
1199 }
1200 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1201 thread_data, ret, fda->entries[i].fd);
1202 ret = record__map_thread_evlist_pollfd_indexes(rec, evlist_pollfd_index: i, thread_pollfd_index: ret);
1203 if (ret < 0) {
1204 pr_err("Failed to map thread and evlist pollfd indexes\n");
1205 return ret;
1206 }
1207 }
1208 return 0;
1209}
1210
1211static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1212{
1213 int t, ret;
1214 struct record_thread *thread_data;
1215
1216 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1217 if (!rec->thread_data) {
1218 pr_err("Failed to allocate thread data\n");
1219 return -ENOMEM;
1220 }
1221 thread_data = rec->thread_data;
1222
1223 for (t = 0; t < rec->nr_threads; t++)
1224 record__thread_data_init_pipes(thread_data: &thread_data[t]);
1225
1226 for (t = 0; t < rec->nr_threads; t++) {
1227 thread_data[t].rec = rec;
1228 thread_data[t].mask = &rec->thread_masks[t];
1229 ret = record__thread_data_init_maps(thread_data: &thread_data[t], evlist);
1230 if (ret) {
1231 pr_err("Failed to initialize thread[%d] maps\n", t);
1232 goto out_free;
1233 }
1234 ret = record__thread_data_init_pollfd(thread_data: &thread_data[t], evlist);
1235 if (ret) {
1236 pr_err("Failed to initialize thread[%d] pollfd\n", t);
1237 goto out_free;
1238 }
1239 if (t) {
1240 thread_data[t].tid = -1;
1241 ret = record__thread_data_open_pipes(thread_data: &thread_data[t]);
1242 if (ret) {
1243 pr_err("Failed to open thread[%d] communication pipes\n", t);
1244 goto out_free;
1245 }
1246 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1247 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1248 if (ret < 0) {
1249 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1250 goto out_free;
1251 }
1252 thread_data[t].ctlfd_pos = ret;
1253 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1254 thread_data, thread_data[t].ctlfd_pos,
1255 thread_data[t].pipes.msg[0]);
1256 } else {
1257 thread_data[t].tid = gettid();
1258
1259 ret = record__dup_non_perf_events(rec, evlist, thread_data: &thread_data[t]);
1260 if (ret < 0)
1261 goto out_free;
1262
1263 thread_data[t].ctlfd_pos = -1; /* Not used */
1264 }
1265 }
1266
1267 return 0;
1268
1269out_free:
1270 record__free_thread_data(rec);
1271
1272 return ret;
1273}
1274
1275static int record__mmap_evlist(struct record *rec,
1276 struct evlist *evlist)
1277{
1278 int i, ret;
1279 struct record_opts *opts = &rec->opts;
1280 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1281 opts->auxtrace_sample_mode;
1282 char msg[512];
1283
1284 if (opts->affinity != PERF_AFFINITY_SYS)
1285 cpu__setup_cpunode_map();
1286
1287 if (evlist__mmap_ex(evlist, pages: opts->mmap_pages,
1288 auxtrace_pages: opts->auxtrace_mmap_pages,
1289 auxtrace_overwrite,
1290 nr_cblocks: opts->nr_cblocks, affinity: opts->affinity,
1291 flush: opts->mmap_flush, comp_level: opts->comp_level) < 0) {
1292 if (errno == EPERM) {
1293 pr_err("Permission error mapping pages.\n"
1294 "Consider increasing "
1295 "/proc/sys/kernel/perf_event_mlock_kb,\n"
1296 "or try again with a smaller value of -m/--mmap_pages.\n"
1297 "(current value: %u,%u)\n",
1298 opts->mmap_pages, opts->auxtrace_mmap_pages);
1299 return -errno;
1300 } else {
1301 pr_err("failed to mmap with %d (%s)\n", errno,
1302 str_error_r(errno, msg, sizeof(msg)));
1303 if (errno)
1304 return -errno;
1305 else
1306 return -EINVAL;
1307 }
1308 }
1309
1310 if (evlist__initialize_ctlfd(evlist, ctl_fd: opts->ctl_fd, ctl_fd_ack: opts->ctl_fd_ack))
1311 return -1;
1312
1313 ret = record__alloc_thread_data(rec, evlist);
1314 if (ret)
1315 return ret;
1316
1317 if (record__threads_enabled(rec)) {
1318 ret = perf_data__create_dir(data: &rec->data, nr: evlist->core.nr_mmaps);
1319 if (ret) {
1320 pr_err("Failed to create data directory: %s\n", strerror(-ret));
1321 return ret;
1322 }
1323 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1324 if (evlist->mmap)
1325 evlist->mmap[i].file = &rec->data.dir.files[i];
1326 if (evlist->overwrite_mmap)
1327 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1328 }
1329 }
1330
1331 return 0;
1332}
1333
1334static int record__mmap(struct record *rec)
1335{
1336 return record__mmap_evlist(rec, evlist: rec->evlist);
1337}
1338
1339static int record__open(struct record *rec)
1340{
1341 char msg[BUFSIZ];
1342 struct evsel *pos;
1343 struct evlist *evlist = rec->evlist;
1344 struct perf_session *session = rec->session;
1345 struct record_opts *opts = &rec->opts;
1346 int rc = 0;
1347
1348 evlist__config(evlist, opts, callchain: &callchain_param);
1349
1350 evlist__for_each_entry(evlist, pos) {
1351try_again:
1352 if (evsel__open(evsel: pos, cpus: pos->core.cpus, threads: pos->core.threads) < 0) {
1353 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1354 if (verbose > 0)
1355 ui__warning(format: "%s\n", msg);
1356 goto try_again;
1357 }
1358 if ((errno == EINVAL || errno == EBADF) &&
1359 pos->core.leader != &pos->core &&
1360 pos->weak_group) {
1361 pos = evlist__reset_weak_group(evlist, evsel: pos, close: true);
1362 goto try_again;
1363 }
1364 rc = -errno;
1365 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1366 ui__error(format: "%s\n", msg);
1367 goto out;
1368 }
1369
1370 pos->supported = true;
1371 }
1372
1373 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1374 pr_warning(
1375"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1376"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1377"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1378"file is not found in the buildid cache or in the vmlinux path.\n\n"
1379"Samples in kernel modules won't be resolved at all.\n\n"
1380"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1381"even with a suitable vmlinux or kallsyms file.\n\n");
1382 }
1383
1384 if (evlist__apply_filters(evlist, err_evsel: &pos)) {
1385 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1386 pos->filter ?: "BPF", evsel__name(pos), errno,
1387 str_error_r(errno, msg, sizeof(msg)));
1388 rc = -1;
1389 goto out;
1390 }
1391
1392 rc = record__mmap(rec);
1393 if (rc)
1394 goto out;
1395
1396 session->evlist = evlist;
1397 perf_session__set_id_hdr_size(session);
1398out:
1399 return rc;
1400}
1401
1402static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1403{
1404 if (rec->evlist->first_sample_time == 0)
1405 rec->evlist->first_sample_time = sample_time;
1406
1407 if (sample_time)
1408 rec->evlist->last_sample_time = sample_time;
1409}
1410
1411static int process_sample_event(struct perf_tool *tool,
1412 union perf_event *event,
1413 struct perf_sample *sample,
1414 struct evsel *evsel,
1415 struct machine *machine)
1416{
1417 struct record *rec = container_of(tool, struct record, tool);
1418
1419 set_timestamp_boundary(rec, sample_time: sample->time);
1420
1421 if (rec->buildid_all)
1422 return 0;
1423
1424 rec->samples++;
1425 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1426}
1427
1428static int process_buildids(struct record *rec)
1429{
1430 struct perf_session *session = rec->session;
1431
1432 if (perf_data__size(data: &rec->data) == 0)
1433 return 0;
1434
1435 /*
1436 * During this process, it'll load kernel map and replace the
1437 * dso->long_name to a real pathname it found. In this case
1438 * we prefer the vmlinux path like
1439 * /lib/modules/3.16.4/build/vmlinux
1440 *
1441 * rather than build-id path (in debug directory).
1442 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1443 */
1444 symbol_conf.ignore_vmlinux_buildid = true;
1445
1446 /*
1447 * If --buildid-all is given, it marks all DSO regardless of hits,
1448 * so no need to process samples. But if timestamp_boundary is enabled,
1449 * it still needs to walk on all samples to get the timestamps of
1450 * first/last samples.
1451 */
1452 if (rec->buildid_all && !rec->timestamp_boundary)
1453 rec->tool.sample = NULL;
1454
1455 return perf_session__process_events(session);
1456}
1457
1458static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1459{
1460 int err;
1461 struct perf_tool *tool = data;
1462 /*
1463 *As for guest kernel when processing subcommand record&report,
1464 *we arrange module mmap prior to guest kernel mmap and trigger
1465 *a preload dso because default guest module symbols are loaded
1466 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1467 *method is used to avoid symbol missing when the first addr is
1468 *in module instead of in guest kernel.
1469 */
1470 err = perf_event__synthesize_modules(tool, process: process_synthesized_event,
1471 machine);
1472 if (err < 0)
1473 pr_err("Couldn't record guest kernel [%d]'s reference"
1474 " relocation symbol.\n", machine->pid);
1475
1476 /*
1477 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1478 * have no _text sometimes.
1479 */
1480 err = perf_event__synthesize_kernel_mmap(tool, process: process_synthesized_event,
1481 machine);
1482 if (err < 0)
1483 pr_err("Couldn't record guest kernel [%d]'s reference"
1484 " relocation symbol.\n", machine->pid);
1485}
1486
1487static struct perf_event_header finished_round_event = {
1488 .size = sizeof(struct perf_event_header),
1489 .type = PERF_RECORD_FINISHED_ROUND,
1490};
1491
1492static struct perf_event_header finished_init_event = {
1493 .size = sizeof(struct perf_event_header),
1494 .type = PERF_RECORD_FINISHED_INIT,
1495};
1496
1497static void record__adjust_affinity(struct record *rec, struct mmap *map)
1498{
1499 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1500 !bitmap_equal(src1: thread->mask->affinity.bits, src2: map->affinity_mask.bits,
1501 nbits: thread->mask->affinity.nbits)) {
1502 bitmap_zero(dst: thread->mask->affinity.bits, nbits: thread->mask->affinity.nbits);
1503 bitmap_or(dst: thread->mask->affinity.bits, src1: thread->mask->affinity.bits,
1504 src2: map->affinity_mask.bits, nbits: thread->mask->affinity.nbits);
1505 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1506 (cpu_set_t *)thread->mask->affinity.bits);
1507 if (verbose == 2) {
1508 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1509 mmap_cpu_mask__scnprintf(mask: &thread->mask->affinity, tag: "affinity");
1510 }
1511 }
1512}
1513
1514static size_t process_comp_header(void *record, size_t increment)
1515{
1516 struct perf_record_compressed *event = record;
1517 size_t size = sizeof(*event);
1518
1519 if (increment) {
1520 event->header.size += increment;
1521 return increment;
1522 }
1523
1524 event->header.type = PERF_RECORD_COMPRESSED;
1525 event->header.size = size;
1526
1527 return size;
1528}
1529
1530static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1531 void *dst, size_t dst_size, void *src, size_t src_size)
1532{
1533 size_t compressed;
1534 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1535 struct zstd_data *zstd_data = &session->zstd_data;
1536
1537 if (map && map->file)
1538 zstd_data = &map->zstd_data;
1539
1540 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1541 max_record_size, process_comp_header);
1542
1543 if (map && map->file) {
1544 thread->bytes_transferred += src_size;
1545 thread->bytes_compressed += compressed;
1546 } else {
1547 session->bytes_transferred += src_size;
1548 session->bytes_compressed += compressed;
1549 }
1550
1551 return compressed;
1552}
1553
1554static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1555 bool overwrite, bool synch)
1556{
1557 u64 bytes_written = rec->bytes_written;
1558 int i;
1559 int rc = 0;
1560 int nr_mmaps;
1561 struct mmap **maps;
1562 int trace_fd = rec->data.file.fd;
1563 off_t off = 0;
1564
1565 if (!evlist)
1566 return 0;
1567
1568 nr_mmaps = thread->nr_mmaps;
1569 maps = overwrite ? thread->overwrite_maps : thread->maps;
1570
1571 if (!maps)
1572 return 0;
1573
1574 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1575 return 0;
1576
1577 if (record__aio_enabled(rec))
1578 off = record__aio_get_pos(trace_fd);
1579
1580 for (i = 0; i < nr_mmaps; i++) {
1581 u64 flush = 0;
1582 struct mmap *map = maps[i];
1583
1584 if (map->core.base) {
1585 record__adjust_affinity(rec, map);
1586 if (synch) {
1587 flush = map->core.flush;
1588 map->core.flush = 1;
1589 }
1590 if (!record__aio_enabled(rec)) {
1591 if (perf_mmap__push(md: map, to: rec, push: record__pushfn) < 0) {
1592 if (synch)
1593 map->core.flush = flush;
1594 rc = -1;
1595 goto out;
1596 }
1597 } else {
1598 if (record__aio_push(rec, map, off: &off) < 0) {
1599 record__aio_set_pos(trace_fd, pos: off);
1600 if (synch)
1601 map->core.flush = flush;
1602 rc = -1;
1603 goto out;
1604 }
1605 }
1606 if (synch)
1607 map->core.flush = flush;
1608 }
1609
1610 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1611 !rec->opts.auxtrace_sample_mode &&
1612 record__auxtrace_mmap_read(rec, map) != 0) {
1613 rc = -1;
1614 goto out;
1615 }
1616 }
1617
1618 if (record__aio_enabled(rec))
1619 record__aio_set_pos(trace_fd, pos: off);
1620
1621 /*
1622 * Mark the round finished in case we wrote
1623 * at least one event.
1624 *
1625 * No need for round events in directory mode,
1626 * because per-cpu maps and files have data
1627 * sorted by kernel.
1628 */
1629 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1630 rc = record__write(rec, NULL, bf: &finished_round_event, size: sizeof(finished_round_event));
1631
1632 if (overwrite)
1633 evlist__toggle_bkw_mmap(evlist, state: BKW_MMAP_EMPTY);
1634out:
1635 return rc;
1636}
1637
1638static int record__mmap_read_all(struct record *rec, bool synch)
1639{
1640 int err;
1641
1642 err = record__mmap_read_evlist(rec, evlist: rec->evlist, overwrite: false, synch);
1643 if (err)
1644 return err;
1645
1646 return record__mmap_read_evlist(rec, evlist: rec->evlist, overwrite: true, synch);
1647}
1648
1649static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1650 void *arg __maybe_unused)
1651{
1652 struct perf_mmap *map = fda->priv[fd].ptr;
1653
1654 if (map)
1655 perf_mmap__put(map);
1656}
1657
1658static void *record__thread(void *arg)
1659{
1660 enum thread_msg msg = THREAD_MSG__READY;
1661 bool terminate = false;
1662 struct fdarray *pollfd;
1663 int err, ctlfd_pos;
1664
1665 thread = arg;
1666 thread->tid = gettid();
1667
1668 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1669 if (err == -1)
1670 pr_warning("threads[%d]: failed to notify on start: %s\n",
1671 thread->tid, strerror(errno));
1672
1673 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1674
1675 pollfd = &thread->pollfd;
1676 ctlfd_pos = thread->ctlfd_pos;
1677
1678 for (;;) {
1679 unsigned long long hits = thread->samples;
1680
1681 if (record__mmap_read_all(rec: thread->rec, synch: false) < 0 || terminate)
1682 break;
1683
1684 if (hits == thread->samples) {
1685
1686 err = fdarray__poll(pollfd, -1);
1687 /*
1688 * Propagate error, only if there's any. Ignore positive
1689 * number of returned events and interrupt error.
1690 */
1691 if (err > 0 || (err < 0 && errno == EINTR))
1692 err = 0;
1693 thread->waking++;
1694
1695 if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1696 record__thread_munmap_filtered, NULL) == 0)
1697 break;
1698 }
1699
1700 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1701 terminate = true;
1702 close(thread->pipes.msg[0]);
1703 thread->pipes.msg[0] = -1;
1704 pollfd->entries[ctlfd_pos].fd = -1;
1705 pollfd->entries[ctlfd_pos].events = 0;
1706 }
1707
1708 pollfd->entries[ctlfd_pos].revents = 0;
1709 }
1710 record__mmap_read_all(rec: thread->rec, synch: true);
1711
1712 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1713 if (err == -1)
1714 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1715 thread->tid, strerror(errno));
1716
1717 return NULL;
1718}
1719
1720static void record__init_features(struct record *rec)
1721{
1722 struct perf_session *session = rec->session;
1723 int feat;
1724
1725 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1726 perf_header__set_feat(header: &session->header, feat);
1727
1728 if (rec->no_buildid)
1729 perf_header__clear_feat(header: &session->header, feat: HEADER_BUILD_ID);
1730
1731#ifdef HAVE_LIBTRACEEVENT
1732 if (!have_tracepoints(&rec->evlist->core.entries))
1733 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1734#endif
1735
1736 if (!rec->opts.branch_stack)
1737 perf_header__clear_feat(header: &session->header, feat: HEADER_BRANCH_STACK);
1738
1739 if (!rec->opts.full_auxtrace)
1740 perf_header__clear_feat(header: &session->header, feat: HEADER_AUXTRACE);
1741
1742 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1743 perf_header__clear_feat(header: &session->header, feat: HEADER_CLOCKID);
1744
1745 if (!rec->opts.use_clockid)
1746 perf_header__clear_feat(header: &session->header, feat: HEADER_CLOCK_DATA);
1747
1748 if (!record__threads_enabled(rec))
1749 perf_header__clear_feat(header: &session->header, feat: HEADER_DIR_FORMAT);
1750
1751 if (!record__comp_enabled(rec))
1752 perf_header__clear_feat(header: &session->header, feat: HEADER_COMPRESSED);
1753
1754 perf_header__clear_feat(header: &session->header, feat: HEADER_STAT);
1755}
1756
1757static void
1758record__finish_output(struct record *rec)
1759{
1760 int i;
1761 struct perf_data *data = &rec->data;
1762 int fd = perf_data__fd(data);
1763
1764 if (data->is_pipe)
1765 return;
1766
1767 rec->session->header.data_size += rec->bytes_written;
1768 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1769 if (record__threads_enabled(rec)) {
1770 for (i = 0; i < data->dir.nr; i++)
1771 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1772 }
1773
1774 if (!rec->no_buildid) {
1775 process_buildids(rec);
1776
1777 if (rec->buildid_all)
1778 dsos__hit_all(session: rec->session);
1779 }
1780 perf_session__write_header(session: rec->session, evlist: rec->evlist, fd, at_exit: true);
1781
1782 return;
1783}
1784
1785static int record__synthesize_workload(struct record *rec, bool tail)
1786{
1787 int err;
1788 struct perf_thread_map *thread_map;
1789 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1790
1791 if (rec->opts.tail_synthesize != tail)
1792 return 0;
1793
1794 thread_map = thread_map__new_by_tid(tid: rec->evlist->workload.pid);
1795 if (thread_map == NULL)
1796 return -1;
1797
1798 err = perf_event__synthesize_thread_map(tool: &rec->tool, threads: thread_map,
1799 process: process_synthesized_event,
1800 machine: &rec->session->machines.host,
1801 needs_mmap,
1802 mmap_data: rec->opts.sample_address);
1803 perf_thread_map__put(thread_map);
1804 return err;
1805}
1806
1807static int write_finished_init(struct record *rec, bool tail)
1808{
1809 if (rec->opts.tail_synthesize != tail)
1810 return 0;
1811
1812 return record__write(rec, NULL, bf: &finished_init_event, size: sizeof(finished_init_event));
1813}
1814
1815static int record__synthesize(struct record *rec, bool tail);
1816
1817static int
1818record__switch_output(struct record *rec, bool at_exit)
1819{
1820 struct perf_data *data = &rec->data;
1821 int fd, err;
1822 char *new_filename;
1823
1824 /* Same Size: "2015122520103046"*/
1825 char timestamp[] = "InvalidTimestamp";
1826
1827 record__aio_mmap_read_sync(rec);
1828
1829 write_finished_init(rec, tail: true);
1830
1831 record__synthesize(rec, tail: true);
1832 if (target__none(target: &rec->opts.target))
1833 record__synthesize_workload(rec, tail: true);
1834
1835 rec->samples = 0;
1836 record__finish_output(rec);
1837 err = fetch_current_timestamp(buf: timestamp, sz: sizeof(timestamp));
1838 if (err) {
1839 pr_err("Failed to get current timestamp\n");
1840 return -EINVAL;
1841 }
1842
1843 fd = perf_data__switch(data, postfix: timestamp,
1844 pos: rec->session->header.data_offset,
1845 at_exit, new_filepath: &new_filename);
1846 if (fd >= 0 && !at_exit) {
1847 rec->bytes_written = 0;
1848 rec->session->header.data_size = 0;
1849 }
1850
1851 if (!quiet)
1852 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1853 data->path, timestamp);
1854
1855 if (rec->switch_output.num_files) {
1856 int n = rec->switch_output.cur_file + 1;
1857
1858 if (n >= rec->switch_output.num_files)
1859 n = 0;
1860 rec->switch_output.cur_file = n;
1861 if (rec->switch_output.filenames[n]) {
1862 remove(rec->switch_output.filenames[n]);
1863 zfree(&rec->switch_output.filenames[n]);
1864 }
1865 rec->switch_output.filenames[n] = new_filename;
1866 } else {
1867 free(new_filename);
1868 }
1869
1870 /* Output tracking events */
1871 if (!at_exit) {
1872 record__synthesize(rec, tail: false);
1873
1874 /*
1875 * In 'perf record --switch-output' without -a,
1876 * record__synthesize() in record__switch_output() won't
1877 * generate tracking events because there's no thread_map
1878 * in evlist. Which causes newly created perf.data doesn't
1879 * contain map and comm information.
1880 * Create a fake thread_map and directly call
1881 * perf_event__synthesize_thread_map() for those events.
1882 */
1883 if (target__none(target: &rec->opts.target))
1884 record__synthesize_workload(rec, tail: false);
1885 write_finished_init(rec, tail: false);
1886 }
1887 return fd;
1888}
1889
1890static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1891 struct perf_record_lost_samples *lost,
1892 int cpu_idx, int thread_idx, u64 lost_count,
1893 u16 misc_flag)
1894{
1895 struct perf_sample_id *sid;
1896 struct perf_sample sample = {};
1897 int id_hdr_size;
1898
1899 lost->lost = lost_count;
1900 if (evsel->core.ids) {
1901 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1902 sample.id = sid->id;
1903 }
1904
1905 id_hdr_size = perf_event__synthesize_id_sample(array: (void *)(lost + 1),
1906 type: evsel->core.attr.sample_type, sample: &sample);
1907 lost->header.size = sizeof(*lost) + id_hdr_size;
1908 lost->header.misc = misc_flag;
1909 record__write(rec, NULL, bf: lost, size: lost->header.size);
1910}
1911
1912static void record__read_lost_samples(struct record *rec)
1913{
1914 struct perf_session *session = rec->session;
1915 struct perf_record_lost_samples *lost;
1916 struct evsel *evsel;
1917
1918 /* there was an error during record__open */
1919 if (session->evlist == NULL)
1920 return;
1921
1922 lost = zalloc(PERF_SAMPLE_MAX_SIZE);
1923 if (lost == NULL) {
1924 pr_debug("Memory allocation failed\n");
1925 return;
1926 }
1927
1928 lost->header.type = PERF_RECORD_LOST_SAMPLES;
1929
1930 evlist__for_each_entry(session->evlist, evsel) {
1931 struct xyarray *xy = evsel->core.sample_id;
1932 u64 lost_count;
1933
1934 if (xy == NULL || evsel->core.fd == NULL)
1935 continue;
1936 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1937 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1938 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1939 continue;
1940 }
1941
1942 for (int x = 0; x < xyarray__max_x(xy); x++) {
1943 for (int y = 0; y < xyarray__max_y(xy); y++) {
1944 struct perf_counts_values count;
1945
1946 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1947 pr_debug("read LOST count failed\n");
1948 goto out;
1949 }
1950
1951 if (count.lost) {
1952 __record__save_lost_samples(rec, evsel, lost,
1953 cpu_idx: x, thread_idx: y, lost_count: count.lost, misc_flag: 0);
1954 }
1955 }
1956 }
1957
1958 lost_count = perf_bpf_filter__lost_count(evsel);
1959 if (lost_count)
1960 __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
1961 PERF_RECORD_MISC_LOST_SAMPLES_BPF);
1962 }
1963out:
1964 free(lost);
1965}
1966
1967static volatile sig_atomic_t workload_exec_errno;
1968
1969/*
1970 * evlist__prepare_workload will send a SIGUSR1
1971 * if the fork fails, since we asked by setting its
1972 * want_signal to true.
1973 */
1974static void workload_exec_failed_signal(int signo __maybe_unused,
1975 siginfo_t *info,
1976 void *ucontext __maybe_unused)
1977{
1978 workload_exec_errno = info->si_value.sival_int;
1979 done = 1;
1980 child_finished = 1;
1981}
1982
1983static void snapshot_sig_handler(int sig);
1984static void alarm_sig_handler(int sig);
1985
1986static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1987{
1988 if (evlist) {
1989 if (evlist->mmap && evlist->mmap[0].core.base)
1990 return evlist->mmap[0].core.base;
1991 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1992 return evlist->overwrite_mmap[0].core.base;
1993 }
1994 return NULL;
1995}
1996
1997static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1998{
1999 const struct perf_event_mmap_page *pc = evlist__pick_pc(evlist: rec->evlist);
2000 if (pc)
2001 return pc;
2002 return NULL;
2003}
2004
2005static int record__synthesize(struct record *rec, bool tail)
2006{
2007 struct perf_session *session = rec->session;
2008 struct machine *machine = &session->machines.host;
2009 struct perf_data *data = &rec->data;
2010 struct record_opts *opts = &rec->opts;
2011 struct perf_tool *tool = &rec->tool;
2012 int err = 0;
2013 event_op f = process_synthesized_event;
2014
2015 if (rec->opts.tail_synthesize != tail)
2016 return 0;
2017
2018 if (data->is_pipe) {
2019 err = perf_event__synthesize_for_pipe(tool, session, data,
2020 process: process_synthesized_event);
2021 if (err < 0)
2022 goto out;
2023
2024 rec->bytes_written += err;
2025 }
2026
2027 err = perf_event__synth_time_conv(pc: record__pick_pc(rec), tool,
2028 process: process_synthesized_event, machine);
2029 if (err)
2030 goto out;
2031
2032 /* Synthesize id_index before auxtrace_info */
2033 err = perf_event__synthesize_id_index(tool,
2034 process: process_synthesized_event,
2035 evlist: session->evlist, machine);
2036 if (err)
2037 goto out;
2038
2039 if (rec->opts.full_auxtrace) {
2040 err = perf_event__synthesize_auxtrace_info(itr: rec->itr, tool,
2041 session, process: process_synthesized_event);
2042 if (err)
2043 goto out;
2044 }
2045
2046 if (!evlist__exclude_kernel(evlist: rec->evlist)) {
2047 err = perf_event__synthesize_kernel_mmap(tool, process: process_synthesized_event,
2048 machine);
2049 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2050 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2051 "Check /proc/kallsyms permission or run as root.\n");
2052
2053 err = perf_event__synthesize_modules(tool, process: process_synthesized_event,
2054 machine);
2055 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2056 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2057 "Check /proc/modules permission or run as root.\n");
2058 }
2059
2060 if (perf_guest) {
2061 machines__process_guests(machines: &session->machines,
2062 process: perf_event__synthesize_guest_os, data: tool);
2063 }
2064
2065 err = perf_event__synthesize_extra_attr(tool: &rec->tool,
2066 evsel_list: rec->evlist,
2067 process: process_synthesized_event,
2068 is_pipe: data->is_pipe);
2069 if (err)
2070 goto out;
2071
2072 err = perf_event__synthesize_thread_map2(tool: &rec->tool, threads: rec->evlist->core.threads,
2073 process: process_synthesized_event,
2074 NULL);
2075 if (err < 0) {
2076 pr_err("Couldn't synthesize thread map.\n");
2077 return err;
2078 }
2079
2080 err = perf_event__synthesize_cpu_map(tool: &rec->tool, cpus: rec->evlist->core.all_cpus,
2081 process: process_synthesized_event, NULL);
2082 if (err < 0) {
2083 pr_err("Couldn't synthesize cpu map.\n");
2084 return err;
2085 }
2086
2087 err = perf_event__synthesize_bpf_events(session, process: process_synthesized_event,
2088 machine, opts);
2089 if (err < 0) {
2090 pr_warning("Couldn't synthesize bpf events.\n");
2091 err = 0;
2092 }
2093
2094 if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2095 err = perf_event__synthesize_cgroups(tool, process: process_synthesized_event,
2096 machine);
2097 if (err < 0) {
2098 pr_warning("Couldn't synthesize cgroup events.\n");
2099 err = 0;
2100 }
2101 }
2102
2103 if (rec->opts.nr_threads_synthesize > 1) {
2104 mutex_init(&synth_lock);
2105 perf_set_multithreaded();
2106 f = process_locked_synthesized_event;
2107 }
2108
2109 if (rec->opts.synth & PERF_SYNTH_TASK) {
2110 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2111
2112 err = __machine__synthesize_threads(machine, tool, target: &opts->target,
2113 threads: rec->evlist->core.threads,
2114 process: f, needs_mmap, data_mmap: opts->sample_address,
2115 nr_threads_synthesize: rec->opts.nr_threads_synthesize);
2116 }
2117
2118 if (rec->opts.nr_threads_synthesize > 1) {
2119 perf_set_singlethreaded();
2120 mutex_destroy(lock: &synth_lock);
2121 }
2122
2123out:
2124 return err;
2125}
2126
2127static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2128{
2129 struct record *rec = data;
2130 pthread_kill(rec->thread_id, SIGUSR2);
2131 return 0;
2132}
2133
2134static int record__setup_sb_evlist(struct record *rec)
2135{
2136 struct record_opts *opts = &rec->opts;
2137
2138 if (rec->sb_evlist != NULL) {
2139 /*
2140 * We get here if --switch-output-event populated the
2141 * sb_evlist, so associate a callback that will send a SIGUSR2
2142 * to the main thread.
2143 */
2144 evlist__set_cb(evlist: rec->sb_evlist, cb: record__process_signal_event, data: rec);
2145 rec->thread_id = pthread_self();
2146 }
2147#ifdef HAVE_LIBBPF_SUPPORT
2148 if (!opts->no_bpf_event) {
2149 if (rec->sb_evlist == NULL) {
2150 rec->sb_evlist = evlist__new();
2151
2152 if (rec->sb_evlist == NULL) {
2153 pr_err("Couldn't create side band evlist.\n.");
2154 return -1;
2155 }
2156 }
2157
2158 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
2159 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2160 return -1;
2161 }
2162 }
2163#endif
2164 if (evlist__start_sb_thread(evlist: rec->sb_evlist, target: &rec->opts.target)) {
2165 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2166 opts->no_bpf_event = true;
2167 }
2168
2169 return 0;
2170}
2171
2172static int record__init_clock(struct record *rec)
2173{
2174 struct perf_session *session = rec->session;
2175 struct timespec ref_clockid;
2176 struct timeval ref_tod;
2177 u64 ref;
2178
2179 if (!rec->opts.use_clockid)
2180 return 0;
2181
2182 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2183 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2184
2185 session->header.env.clock.clockid = rec->opts.clockid;
2186
2187 if (gettimeofday(&ref_tod, NULL) != 0) {
2188 pr_err("gettimeofday failed, cannot set reference time.\n");
2189 return -1;
2190 }
2191
2192 if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2193 pr_err("clock_gettime failed, cannot set reference time.\n");
2194 return -1;
2195 }
2196
2197 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2198 (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2199
2200 session->header.env.clock.tod_ns = ref;
2201
2202 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2203 (u64) ref_clockid.tv_nsec;
2204
2205 session->header.env.clock.clockid_ns = ref;
2206 return 0;
2207}
2208
2209static void hit_auxtrace_snapshot_trigger(struct record *rec)
2210{
2211 if (trigger_is_ready(t: &auxtrace_snapshot_trigger)) {
2212 trigger_hit(t: &auxtrace_snapshot_trigger);
2213 auxtrace_record__snapshot_started = 1;
2214 if (auxtrace_record__snapshot_start(itr: rec->itr))
2215 trigger_error(t: &auxtrace_snapshot_trigger);
2216 }
2217}
2218
2219static void record__uniquify_name(struct record *rec)
2220{
2221 struct evsel *pos;
2222 struct evlist *evlist = rec->evlist;
2223 char *new_name;
2224 int ret;
2225
2226 if (perf_pmus__num_core_pmus() == 1)
2227 return;
2228
2229 evlist__for_each_entry(evlist, pos) {
2230 if (!evsel__is_hybrid(evsel: pos))
2231 continue;
2232
2233 if (strchr(pos->name, '/'))
2234 continue;
2235
2236 ret = asprintf(&new_name, "%s/%s/",
2237 pos->pmu_name, pos->name);
2238 if (ret) {
2239 free(pos->name);
2240 pos->name = new_name;
2241 }
2242 }
2243}
2244
2245static int record__terminate_thread(struct record_thread *thread_data)
2246{
2247 int err;
2248 enum thread_msg ack = THREAD_MSG__UNDEFINED;
2249 pid_t tid = thread_data->tid;
2250
2251 close(thread_data->pipes.msg[1]);
2252 thread_data->pipes.msg[1] = -1;
2253 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2254 if (err > 0)
2255 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2256 else
2257 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2258 thread->tid, tid);
2259
2260 return 0;
2261}
2262
2263static int record__start_threads(struct record *rec)
2264{
2265 int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2266 struct record_thread *thread_data = rec->thread_data;
2267 sigset_t full, mask;
2268 pthread_t handle;
2269 pthread_attr_t attrs;
2270
2271 thread = &thread_data[0];
2272
2273 if (!record__threads_enabled(rec))
2274 return 0;
2275
2276 sigfillset(set: &full);
2277 if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2278 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2279 return -1;
2280 }
2281
2282 pthread_attr_init(&attrs);
2283 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2284
2285 for (t = 1; t < nr_threads; t++) {
2286 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2287
2288#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2289 pthread_attr_setaffinity_np(&attrs,
2290 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2291 (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2292#endif
2293 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2294 for (tt = 1; tt < t; tt++)
2295 record__terminate_thread(thread_data: &thread_data[t]);
2296 pr_err("Failed to start threads: %s\n", strerror(errno));
2297 ret = -1;
2298 goto out_err;
2299 }
2300
2301 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2302 if (err > 0)
2303 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2304 thread_msg_tags[msg]);
2305 else
2306 pr_warning("threads[%d]: failed to receive start notification from %d\n",
2307 thread->tid, rec->thread_data[t].tid);
2308 }
2309
2310 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2311 (cpu_set_t *)thread->mask->affinity.bits);
2312
2313 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2314
2315out_err:
2316 pthread_attr_destroy(&attrs);
2317
2318 if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2319 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2320 ret = -1;
2321 }
2322
2323 return ret;
2324}
2325
2326static int record__stop_threads(struct record *rec)
2327{
2328 int t;
2329 struct record_thread *thread_data = rec->thread_data;
2330
2331 for (t = 1; t < rec->nr_threads; t++)
2332 record__terminate_thread(thread_data: &thread_data[t]);
2333
2334 for (t = 0; t < rec->nr_threads; t++) {
2335 rec->samples += thread_data[t].samples;
2336 if (!record__threads_enabled(rec))
2337 continue;
2338 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2339 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2340 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2341 thread_data[t].samples, thread_data[t].waking);
2342 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2343 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2344 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2345 else
2346 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2347 }
2348
2349 return 0;
2350}
2351
2352static unsigned long record__waking(struct record *rec)
2353{
2354 int t;
2355 unsigned long waking = 0;
2356 struct record_thread *thread_data = rec->thread_data;
2357
2358 for (t = 0; t < rec->nr_threads; t++)
2359 waking += thread_data[t].waking;
2360
2361 return waking;
2362}
2363
2364static int __cmd_record(struct record *rec, int argc, const char **argv)
2365{
2366 int err;
2367 int status = 0;
2368 const bool forks = argc > 0;
2369 struct perf_tool *tool = &rec->tool;
2370 struct record_opts *opts = &rec->opts;
2371 struct perf_data *data = &rec->data;
2372 struct perf_session *session;
2373 bool disabled = false, draining = false;
2374 int fd;
2375 float ratio = 0;
2376 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2377
2378 atexit(record__sig_exit);
2379 signal(SIGCHLD, sig_handler);
2380 signal(SIGINT, sig_handler);
2381 signal(SIGTERM, sig_handler);
2382 signal(SIGSEGV, sigsegv_handler);
2383
2384 if (rec->opts.record_namespaces)
2385 tool->namespace_events = true;
2386
2387 if (rec->opts.record_cgroup) {
2388#ifdef HAVE_FILE_HANDLE
2389 tool->cgroup_events = true;
2390#else
2391 pr_err("cgroup tracking is not supported\n");
2392 return -1;
2393#endif
2394 }
2395
2396 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2397 signal(SIGUSR2, snapshot_sig_handler);
2398 if (rec->opts.auxtrace_snapshot_mode)
2399 trigger_on(t: &auxtrace_snapshot_trigger);
2400 if (rec->switch_output.enabled)
2401 trigger_on(t: &switch_output_trigger);
2402 } else {
2403 signal(SIGUSR2, SIG_IGN);
2404 }
2405
2406 session = perf_session__new(data, tool);
2407 if (IS_ERR(ptr: session)) {
2408 pr_err("Perf session creation failed.\n");
2409 return PTR_ERR(ptr: session);
2410 }
2411
2412 if (record__threads_enabled(rec)) {
2413 if (perf_data__is_pipe(data: &rec->data)) {
2414 pr_err("Parallel trace streaming is not available in pipe mode.\n");
2415 return -1;
2416 }
2417 if (rec->opts.full_auxtrace) {
2418 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2419 return -1;
2420 }
2421 }
2422
2423 fd = perf_data__fd(data);
2424 rec->session = session;
2425
2426 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2427 pr_err("Compression initialization failed.\n");
2428 return -1;
2429 }
2430#ifdef HAVE_EVENTFD_SUPPORT
2431 done_fd = eventfd(0, EFD_NONBLOCK);
2432 if (done_fd < 0) {
2433 pr_err("Failed to create wakeup eventfd, error: %m\n");
2434 status = -1;
2435 goto out_delete_session;
2436 }
2437 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2438 if (err < 0) {
2439 pr_err("Failed to add wakeup eventfd to poll list\n");
2440 status = err;
2441 goto out_delete_session;
2442 }
2443#endif // HAVE_EVENTFD_SUPPORT
2444
2445 session->header.env.comp_type = PERF_COMP_ZSTD;
2446 session->header.env.comp_level = rec->opts.comp_level;
2447
2448 if (rec->opts.kcore &&
2449 !record__kcore_readable(machine: &session->machines.host)) {
2450 pr_err("ERROR: kcore is not readable.\n");
2451 return -1;
2452 }
2453
2454 if (record__init_clock(rec))
2455 return -1;
2456
2457 record__init_features(rec);
2458
2459 if (forks) {
2460 err = evlist__prepare_workload(evlist: rec->evlist, target: &opts->target, argv, pipe_output: data->is_pipe,
2461 exec_error: workload_exec_failed_signal);
2462 if (err < 0) {
2463 pr_err("Couldn't run the workload!\n");
2464 status = err;
2465 goto out_delete_session;
2466 }
2467 }
2468
2469 /*
2470 * If we have just single event and are sending data
2471 * through pipe, we need to force the ids allocation,
2472 * because we synthesize event name through the pipe
2473 * and need the id for that.
2474 */
2475 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2476 rec->opts.sample_id = true;
2477
2478 record__uniquify_name(rec);
2479
2480 /* Debug message used by test scripts */
2481 pr_debug3("perf record opening and mmapping events\n");
2482 if (record__open(rec) != 0) {
2483 err = -1;
2484 goto out_free_threads;
2485 }
2486 /* Debug message used by test scripts */
2487 pr_debug3("perf record done opening and mmapping events\n");
2488 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2489
2490 if (rec->opts.kcore) {
2491 err = record__kcore_copy(machine: &session->machines.host, data);
2492 if (err) {
2493 pr_err("ERROR: Failed to copy kcore\n");
2494 goto out_free_threads;
2495 }
2496 }
2497
2498 /*
2499 * Normally perf_session__new would do this, but it doesn't have the
2500 * evlist.
2501 */
2502 if (rec->tool.ordered_events && !evlist__sample_id_all(evlist: rec->evlist)) {
2503 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2504 rec->tool.ordered_events = false;
2505 }
2506
2507 if (evlist__nr_groups(evlist: rec->evlist) == 0)
2508 perf_header__clear_feat(header: &session->header, feat: HEADER_GROUP_DESC);
2509
2510 if (data->is_pipe) {
2511 err = perf_header__write_pipe(fd);
2512 if (err < 0)
2513 goto out_free_threads;
2514 } else {
2515 err = perf_session__write_header(session, evlist: rec->evlist, fd, at_exit: false);
2516 if (err < 0)
2517 goto out_free_threads;
2518 }
2519
2520 err = -1;
2521 if (!rec->no_buildid
2522 && !perf_header__has_feat(header: &session->header, feat: HEADER_BUILD_ID)) {
2523 pr_err("Couldn't generate buildids. "
2524 "Use --no-buildid to profile anyway.\n");
2525 goto out_free_threads;
2526 }
2527
2528 err = record__setup_sb_evlist(rec);
2529 if (err)
2530 goto out_free_threads;
2531
2532 err = record__synthesize(rec, tail: false);
2533 if (err < 0)
2534 goto out_free_threads;
2535
2536 if (rec->realtime_prio) {
2537 struct sched_param param;
2538
2539 param.sched_priority = rec->realtime_prio;
2540 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2541 pr_err("Could not set realtime priority.\n");
2542 err = -1;
2543 goto out_free_threads;
2544 }
2545 }
2546
2547 if (record__start_threads(rec))
2548 goto out_free_threads;
2549
2550 /*
2551 * When perf is starting the traced process, all the events
2552 * (apart from group members) have enable_on_exec=1 set,
2553 * so don't spoil it by prematurely enabling them.
2554 */
2555 if (!target__none(target: &opts->target) && !opts->target.initial_delay)
2556 evlist__enable(evlist: rec->evlist);
2557
2558 /*
2559 * Let the child rip
2560 */
2561 if (forks) {
2562 struct machine *machine = &session->machines.host;
2563 union perf_event *event;
2564 pid_t tgid;
2565
2566 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2567 if (event == NULL) {
2568 err = -ENOMEM;
2569 goto out_child;
2570 }
2571
2572 /*
2573 * Some H/W events are generated before COMM event
2574 * which is emitted during exec(), so perf script
2575 * cannot see a correct process name for those events.
2576 * Synthesize COMM event to prevent it.
2577 */
2578 tgid = perf_event__synthesize_comm(tool, event,
2579 pid: rec->evlist->workload.pid,
2580 process: process_synthesized_event,
2581 machine);
2582 free(event);
2583
2584 if (tgid == -1)
2585 goto out_child;
2586
2587 event = malloc(sizeof(event->namespaces) +
2588 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2589 machine->id_hdr_size);
2590 if (event == NULL) {
2591 err = -ENOMEM;
2592 goto out_child;
2593 }
2594
2595 /*
2596 * Synthesize NAMESPACES event for the command specified.
2597 */
2598 perf_event__synthesize_namespaces(tool, event,
2599 pid: rec->evlist->workload.pid,
2600 tgid, process: process_synthesized_event,
2601 machine);
2602 free(event);
2603
2604 evlist__start_workload(evlist: rec->evlist);
2605 }
2606
2607 if (opts->target.initial_delay) {
2608 pr_info(EVLIST_DISABLED_MSG);
2609 if (opts->target.initial_delay > 0) {
2610 usleep(opts->target.initial_delay * USEC_PER_MSEC);
2611 evlist__enable(evlist: rec->evlist);
2612 pr_info(EVLIST_ENABLED_MSG);
2613 }
2614 }
2615
2616 err = event_enable_timer__start(eet: rec->evlist->eet);
2617 if (err)
2618 goto out_child;
2619
2620 /* Debug message used by test scripts */
2621 pr_debug3("perf record has started\n");
2622 fflush(stderr);
2623
2624 trigger_ready(t: &auxtrace_snapshot_trigger);
2625 trigger_ready(t: &switch_output_trigger);
2626 perf_hooks__invoke_record_start();
2627
2628 /*
2629 * Must write FINISHED_INIT so it will be seen after all other
2630 * synthesized user events, but before any regular events.
2631 */
2632 err = write_finished_init(rec, tail: false);
2633 if (err < 0)
2634 goto out_child;
2635
2636 for (;;) {
2637 unsigned long long hits = thread->samples;
2638
2639 /*
2640 * rec->evlist->bkw_mmap_state is possible to be
2641 * BKW_MMAP_EMPTY here: when done == true and
2642 * hits != rec->samples in previous round.
2643 *
2644 * evlist__toggle_bkw_mmap ensure we never
2645 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2646 */
2647 if (trigger_is_hit(t: &switch_output_trigger) || done || draining)
2648 evlist__toggle_bkw_mmap(evlist: rec->evlist, state: BKW_MMAP_DATA_PENDING);
2649
2650 if (record__mmap_read_all(rec, synch: false) < 0) {
2651 trigger_error(t: &auxtrace_snapshot_trigger);
2652 trigger_error(t: &switch_output_trigger);
2653 err = -1;
2654 goto out_child;
2655 }
2656
2657 if (auxtrace_record__snapshot_started) {
2658 auxtrace_record__snapshot_started = 0;
2659 if (!trigger_is_error(t: &auxtrace_snapshot_trigger))
2660 record__read_auxtrace_snapshot(rec, on_exit: false);
2661 if (trigger_is_error(t: &auxtrace_snapshot_trigger)) {
2662 pr_err("AUX area tracing snapshot failed\n");
2663 err = -1;
2664 goto out_child;
2665 }
2666 }
2667
2668 if (trigger_is_hit(t: &switch_output_trigger)) {
2669 /*
2670 * If switch_output_trigger is hit, the data in
2671 * overwritable ring buffer should have been collected,
2672 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2673 *
2674 * If SIGUSR2 raise after or during record__mmap_read_all(),
2675 * record__mmap_read_all() didn't collect data from
2676 * overwritable ring buffer. Read again.
2677 */
2678 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2679 continue;
2680 trigger_ready(t: &switch_output_trigger);
2681
2682 /*
2683 * Reenable events in overwrite ring buffer after
2684 * record__mmap_read_all(): we should have collected
2685 * data from it.
2686 */
2687 evlist__toggle_bkw_mmap(evlist: rec->evlist, state: BKW_MMAP_RUNNING);
2688
2689 if (!quiet)
2690 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2691 record__waking(rec));
2692 thread->waking = 0;
2693 fd = record__switch_output(rec, at_exit: false);
2694 if (fd < 0) {
2695 pr_err("Failed to switch to new file\n");
2696 trigger_error(t: &switch_output_trigger);
2697 err = fd;
2698 goto out_child;
2699 }
2700
2701 /* re-arm the alarm */
2702 if (rec->switch_output.time)
2703 alarm(rec->switch_output.time);
2704 }
2705
2706 if (hits == thread->samples) {
2707 if (done || draining)
2708 break;
2709 err = fdarray__poll(&thread->pollfd, -1);
2710 /*
2711 * Propagate error, only if there's any. Ignore positive
2712 * number of returned events and interrupt error.
2713 */
2714 if (err > 0 || (err < 0 && errno == EINTR))
2715 err = 0;
2716 thread->waking++;
2717
2718 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2719 record__thread_munmap_filtered, NULL) == 0)
2720 draining = true;
2721
2722 err = record__update_evlist_pollfd_from_thread(rec, evlist: rec->evlist, thread_data: thread);
2723 if (err)
2724 goto out_child;
2725 }
2726
2727 if (evlist__ctlfd_process(evlist: rec->evlist, cmd: &cmd) > 0) {
2728 switch (cmd) {
2729 case EVLIST_CTL_CMD_SNAPSHOT:
2730 hit_auxtrace_snapshot_trigger(rec);
2731 evlist__ctlfd_ack(evlist: rec->evlist);
2732 break;
2733 case EVLIST_CTL_CMD_STOP:
2734 done = 1;
2735 break;
2736 case EVLIST_CTL_CMD_ACK:
2737 case EVLIST_CTL_CMD_UNSUPPORTED:
2738 case EVLIST_CTL_CMD_ENABLE:
2739 case EVLIST_CTL_CMD_DISABLE:
2740 case EVLIST_CTL_CMD_EVLIST:
2741 case EVLIST_CTL_CMD_PING:
2742 default:
2743 break;
2744 }
2745 }
2746
2747 err = event_enable_timer__process(eet: rec->evlist->eet);
2748 if (err < 0)
2749 goto out_child;
2750 if (err) {
2751 err = 0;
2752 done = 1;
2753 }
2754
2755 /*
2756 * When perf is starting the traced process, at the end events
2757 * die with the process and we wait for that. Thus no need to
2758 * disable events in this case.
2759 */
2760 if (done && !disabled && !target__none(target: &opts->target)) {
2761 trigger_off(t: &auxtrace_snapshot_trigger);
2762 evlist__disable(evlist: rec->evlist);
2763 disabled = true;
2764 }
2765 }
2766
2767 trigger_off(t: &auxtrace_snapshot_trigger);
2768 trigger_off(t: &switch_output_trigger);
2769
2770 if (opts->auxtrace_snapshot_on_exit)
2771 record__auxtrace_snapshot_exit(rec);
2772
2773 if (forks && workload_exec_errno) {
2774 char msg[STRERR_BUFSIZE], strevsels[2048];
2775 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2776
2777 evlist__scnprintf_evsels(evlist: rec->evlist, size: sizeof(strevsels), bf: strevsels);
2778
2779 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2780 strevsels, argv[0], emsg);
2781 err = -1;
2782 goto out_child;
2783 }
2784
2785 if (!quiet)
2786 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2787 record__waking(rec));
2788
2789 write_finished_init(rec, tail: true);
2790
2791 if (target__none(target: &rec->opts.target))
2792 record__synthesize_workload(rec, tail: true);
2793
2794out_child:
2795 record__stop_threads(rec);
2796 record__mmap_read_all(rec, synch: true);
2797out_free_threads:
2798 record__free_thread_data(rec);
2799 evlist__finalize_ctlfd(evlist: rec->evlist);
2800 record__aio_mmap_read_sync(rec);
2801
2802 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2803 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2804 session->header.env.comp_ratio = ratio + 0.5;
2805 }
2806
2807 if (forks) {
2808 int exit_status;
2809
2810 if (!child_finished)
2811 kill(rec->evlist->workload.pid, SIGTERM);
2812
2813 wait(&exit_status);
2814
2815 if (err < 0)
2816 status = err;
2817 else if (WIFEXITED(exit_status))
2818 status = WEXITSTATUS(exit_status);
2819 else if (WIFSIGNALED(exit_status))
2820 signr = WTERMSIG(exit_status);
2821 } else
2822 status = err;
2823
2824 if (rec->off_cpu)
2825 rec->bytes_written += off_cpu_write(session: rec->session);
2826
2827 record__read_lost_samples(rec);
2828 record__synthesize(rec, tail: true);
2829 /* this will be recalculated during process_buildids() */
2830 rec->samples = 0;
2831
2832 if (!err) {
2833 if (!rec->timestamp_filename) {
2834 record__finish_output(rec);
2835 } else {
2836 fd = record__switch_output(rec, at_exit: true);
2837 if (fd < 0) {
2838 status = fd;
2839 goto out_delete_session;
2840 }
2841 }
2842 }
2843
2844 perf_hooks__invoke_record_end();
2845
2846 if (!err && !quiet) {
2847 char samples[128];
2848 const char *postfix = rec->timestamp_filename ?
2849 ".<timestamp>" : "";
2850
2851 if (rec->samples && !rec->opts.full_auxtrace)
2852 scnprintf(samples, sizeof(samples),
2853 " (%" PRIu64 " samples)", rec->samples);
2854 else
2855 samples[0] = '\0';
2856
2857 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2858 perf_data__size(data) / 1024.0 / 1024.0,
2859 data->path, postfix, samples);
2860 if (ratio) {
2861 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2862 rec->session->bytes_transferred / 1024.0 / 1024.0,
2863 ratio);
2864 }
2865 fprintf(stderr, " ]\n");
2866 }
2867
2868out_delete_session:
2869#ifdef HAVE_EVENTFD_SUPPORT
2870 if (done_fd >= 0) {
2871 fd = done_fd;
2872 done_fd = -1;
2873
2874 close(fd);
2875 }
2876#endif
2877 zstd_fini(&session->zstd_data);
2878 perf_session__delete(session);
2879
2880 if (!opts->no_bpf_event)
2881 evlist__stop_sb_thread(evlist: rec->sb_evlist);
2882 return status;
2883}
2884
2885static void callchain_debug(struct callchain_param *callchain)
2886{
2887 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2888
2889 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2890
2891 if (callchain->record_mode == CALLCHAIN_DWARF)
2892 pr_debug("callchain: stack dump size %d\n",
2893 callchain->dump_size);
2894}
2895
2896int record_opts__parse_callchain(struct record_opts *record,
2897 struct callchain_param *callchain,
2898 const char *arg, bool unset)
2899{
2900 int ret;
2901 callchain->enabled = !unset;
2902
2903 /* --no-call-graph */
2904 if (unset) {
2905 callchain->record_mode = CALLCHAIN_NONE;
2906 pr_debug("callchain: disabled\n");
2907 return 0;
2908 }
2909
2910 ret = parse_callchain_record_opt(arg, param: callchain);
2911 if (!ret) {
2912 /* Enable data address sampling for DWARF unwind. */
2913 if (callchain->record_mode == CALLCHAIN_DWARF)
2914 record->sample_address = true;
2915 callchain_debug(callchain);
2916 }
2917
2918 return ret;
2919}
2920
2921int record_parse_callchain_opt(const struct option *opt,
2922 const char *arg,
2923 int unset)
2924{
2925 return record_opts__parse_callchain(record: opt->value, callchain: &callchain_param, arg, unset);
2926}
2927
2928int record_callchain_opt(const struct option *opt,
2929 const char *arg __maybe_unused,
2930 int unset __maybe_unused)
2931{
2932 struct callchain_param *callchain = opt->value;
2933
2934 callchain->enabled = true;
2935
2936 if (callchain->record_mode == CALLCHAIN_NONE)
2937 callchain->record_mode = CALLCHAIN_FP;
2938
2939 callchain_debug(callchain);
2940 return 0;
2941}
2942
2943static int perf_record_config(const char *var, const char *value, void *cb)
2944{
2945 struct record *rec = cb;
2946
2947 if (!strcmp(var, "record.build-id")) {
2948 if (!strcmp(value, "cache"))
2949 rec->no_buildid_cache = false;
2950 else if (!strcmp(value, "no-cache"))
2951 rec->no_buildid_cache = true;
2952 else if (!strcmp(value, "skip"))
2953 rec->no_buildid = true;
2954 else if (!strcmp(value, "mmap"))
2955 rec->buildid_mmap = true;
2956 else
2957 return -1;
2958 return 0;
2959 }
2960 if (!strcmp(var, "record.call-graph")) {
2961 var = "call-graph.record-mode";
2962 return perf_default_config(var, value, cb);
2963 }
2964#ifdef HAVE_AIO_SUPPORT
2965 if (!strcmp(var, "record.aio")) {
2966 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2967 if (!rec->opts.nr_cblocks)
2968 rec->opts.nr_cblocks = nr_cblocks_default;
2969 }
2970#endif
2971 if (!strcmp(var, "record.debuginfod")) {
2972 rec->debuginfod.urls = strdup(value);
2973 if (!rec->debuginfod.urls)
2974 return -ENOMEM;
2975 rec->debuginfod.set = true;
2976 }
2977
2978 return 0;
2979}
2980
2981static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
2982{
2983 struct record *rec = (struct record *)opt->value;
2984
2985 return evlist__parse_event_enable_time(evlist: rec->evlist, opts: &rec->opts, str, unset);
2986}
2987
2988static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2989{
2990 struct record_opts *opts = (struct record_opts *)opt->value;
2991
2992 if (unset || !str)
2993 return 0;
2994
2995 if (!strcasecmp(s1: str, s2: "node"))
2996 opts->affinity = PERF_AFFINITY_NODE;
2997 else if (!strcasecmp(s1: str, s2: "cpu"))
2998 opts->affinity = PERF_AFFINITY_CPU;
2999
3000 return 0;
3001}
3002
3003static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3004{
3005 mask->nbits = nr_bits;
3006 mask->bits = bitmap_zalloc(mask->nbits);
3007 if (!mask->bits)
3008 return -ENOMEM;
3009
3010 return 0;
3011}
3012
3013static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3014{
3015 bitmap_free(bitmap: mask->bits);
3016 mask->nbits = 0;
3017}
3018
3019static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3020{
3021 int ret;
3022
3023 ret = record__mmap_cpu_mask_alloc(mask: &mask->maps, nr_bits);
3024 if (ret) {
3025 mask->affinity.bits = NULL;
3026 return ret;
3027 }
3028
3029 ret = record__mmap_cpu_mask_alloc(mask: &mask->affinity, nr_bits);
3030 if (ret) {
3031 record__mmap_cpu_mask_free(mask: &mask->maps);
3032 mask->maps.bits = NULL;
3033 }
3034
3035 return ret;
3036}
3037
3038static void record__thread_mask_free(struct thread_mask *mask)
3039{
3040 record__mmap_cpu_mask_free(mask: &mask->maps);
3041 record__mmap_cpu_mask_free(mask: &mask->affinity);
3042}
3043
3044static int record__parse_threads(const struct option *opt, const char *str, int unset)
3045{
3046 int s;
3047 struct record_opts *opts = opt->value;
3048
3049 if (unset || !str || !strlen(str)) {
3050 opts->threads_spec = THREAD_SPEC__CPU;
3051 } else {
3052 for (s = 1; s < THREAD_SPEC__MAX; s++) {
3053 if (s == THREAD_SPEC__USER) {
3054 opts->threads_user_spec = strdup(str);
3055 if (!opts->threads_user_spec)
3056 return -ENOMEM;
3057 opts->threads_spec = THREAD_SPEC__USER;
3058 break;
3059 }
3060 if (!strncasecmp(s1: str, s2: thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3061 opts->threads_spec = s;
3062 break;
3063 }
3064 }
3065 }
3066
3067 if (opts->threads_spec == THREAD_SPEC__USER)
3068 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3069 else
3070 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3071
3072 return 0;
3073}
3074
3075static int parse_output_max_size(const struct option *opt,
3076 const char *str, int unset)
3077{
3078 unsigned long *s = (unsigned long *)opt->value;
3079 static struct parse_tag tags_size[] = {
3080 { .tag = 'B', .mult = 1 },
3081 { .tag = 'K', .mult = 1 << 10 },
3082 { .tag = 'M', .mult = 1 << 20 },
3083 { .tag = 'G', .mult = 1 << 30 },
3084 { .tag = 0 },
3085 };
3086 unsigned long val;
3087
3088 if (unset) {
3089 *s = 0;
3090 return 0;
3091 }
3092
3093 val = parse_tag_value(str, tags: tags_size);
3094 if (val != (unsigned long) -1) {
3095 *s = val;
3096 return 0;
3097 }
3098
3099 return -1;
3100}
3101
3102static int record__parse_mmap_pages(const struct option *opt,
3103 const char *str,
3104 int unset __maybe_unused)
3105{
3106 struct record_opts *opts = opt->value;
3107 char *s, *p;
3108 unsigned int mmap_pages;
3109 int ret;
3110
3111 if (!str)
3112 return -EINVAL;
3113
3114 s = strdup(str);
3115 if (!s)
3116 return -ENOMEM;
3117
3118 p = strchr(s, ',');
3119 if (p)
3120 *p = '\0';
3121
3122 if (*s) {
3123 ret = __evlist__parse_mmap_pages(mmap_pages: &mmap_pages, str: s);
3124 if (ret)
3125 goto out_free;
3126 opts->mmap_pages = mmap_pages;
3127 }
3128
3129 if (!p) {
3130 ret = 0;
3131 goto out_free;
3132 }
3133
3134 ret = __evlist__parse_mmap_pages(mmap_pages: &mmap_pages, str: p + 1);
3135 if (ret)
3136 goto out_free;
3137
3138 opts->auxtrace_mmap_pages = mmap_pages;
3139
3140out_free:
3141 free(s);
3142 return ret;
3143}
3144
3145void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3146{
3147}
3148
3149static int parse_control_option(const struct option *opt,
3150 const char *str,
3151 int unset __maybe_unused)
3152{
3153 struct record_opts *opts = opt->value;
3154
3155 return evlist__parse_control(str, ctl_fd: &opts->ctl_fd, ctl_fd_ack: &opts->ctl_fd_ack, ctl_fd_close: &opts->ctl_fd_close);
3156}
3157
3158static void switch_output_size_warn(struct record *rec)
3159{
3160 u64 wakeup_size = evlist__mmap_size(pages: rec->opts.mmap_pages);
3161 struct switch_output *s = &rec->switch_output;
3162
3163 wakeup_size /= 2;
3164
3165 if (s->size < wakeup_size) {
3166 char buf[100];
3167
3168 unit_number__scnprintf(buf, size: sizeof(buf), n: wakeup_size);
3169 pr_warning("WARNING: switch-output data size lower than "
3170 "wakeup kernel buffer size (%s) "
3171 "expect bigger perf.data sizes\n", buf);
3172 }
3173}
3174
3175static int switch_output_setup(struct record *rec)
3176{
3177 struct switch_output *s = &rec->switch_output;
3178 static struct parse_tag tags_size[] = {
3179 { .tag = 'B', .mult = 1 },
3180 { .tag = 'K', .mult = 1 << 10 },
3181 { .tag = 'M', .mult = 1 << 20 },
3182 { .tag = 'G', .mult = 1 << 30 },
3183 { .tag = 0 },
3184 };
3185 static struct parse_tag tags_time[] = {
3186 { .tag = 's', .mult = 1 },
3187 { .tag = 'm', .mult = 60 },
3188 { .tag = 'h', .mult = 60*60 },
3189 { .tag = 'd', .mult = 60*60*24 },
3190 { .tag = 0 },
3191 };
3192 unsigned long val;
3193
3194 /*
3195 * If we're using --switch-output-events, then we imply its
3196 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3197 * thread to its parent.
3198 */
3199 if (rec->switch_output_event_set) {
3200 if (record__threads_enabled(rec)) {
3201 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3202 return 0;
3203 }
3204 goto do_signal;
3205 }
3206
3207 if (!s->set)
3208 return 0;
3209
3210 if (record__threads_enabled(rec)) {
3211 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3212 return 0;
3213 }
3214
3215 if (!strcmp(s->str, "signal")) {
3216do_signal:
3217 s->signal = true;
3218 pr_debug("switch-output with SIGUSR2 signal\n");
3219 goto enabled;
3220 }
3221
3222 val = parse_tag_value(str: s->str, tags: tags_size);
3223 if (val != (unsigned long) -1) {
3224 s->size = val;
3225 pr_debug("switch-output with %s size threshold\n", s->str);
3226 goto enabled;
3227 }
3228
3229 val = parse_tag_value(str: s->str, tags: tags_time);
3230 if (val != (unsigned long) -1) {
3231 s->time = val;
3232 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3233 s->str, s->time);
3234 goto enabled;
3235 }
3236
3237 return -1;
3238
3239enabled:
3240 rec->timestamp_filename = true;
3241 s->enabled = true;
3242
3243 if (s->size && !rec->opts.no_buffering)
3244 switch_output_size_warn(rec);
3245
3246 return 0;
3247}
3248
3249static const char * const __record_usage[] = {
3250 "perf record [<options>] [<command>]",
3251 "perf record [<options>] -- <command> [<options>]",
3252 NULL
3253};
3254const char * const *record_usage = __record_usage;
3255
3256static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3257 struct perf_sample *sample, struct machine *machine)
3258{
3259 /*
3260 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3261 * no need to add them twice.
3262 */
3263 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3264 return 0;
3265 return perf_event__process_mmap(tool, event, sample, machine);
3266}
3267
3268static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3269 struct perf_sample *sample, struct machine *machine)
3270{
3271 /*
3272 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3273 * no need to add them twice.
3274 */
3275 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3276 return 0;
3277
3278 return perf_event__process_mmap2(tool, event, sample, machine);
3279}
3280
3281static int process_timestamp_boundary(struct perf_tool *tool,
3282 union perf_event *event __maybe_unused,
3283 struct perf_sample *sample,
3284 struct machine *machine __maybe_unused)
3285{
3286 struct record *rec = container_of(tool, struct record, tool);
3287
3288 set_timestamp_boundary(rec, sample_time: sample->time);
3289 return 0;
3290}
3291
3292static int parse_record_synth_option(const struct option *opt,
3293 const char *str,
3294 int unset __maybe_unused)
3295{
3296 struct record_opts *opts = opt->value;
3297 char *p = strdup(str);
3298
3299 if (p == NULL)
3300 return -1;
3301
3302 opts->synth = parse_synth_opt(str: p);
3303 free(p);
3304
3305 if (opts->synth < 0) {
3306 pr_err("Invalid synth option: %s\n", str);
3307 return -1;
3308 }
3309 return 0;
3310}
3311
3312/*
3313 * XXX Ideally would be local to cmd_record() and passed to a record__new
3314 * because we need to have access to it in record__exit, that is called
3315 * after cmd_record() exits, but since record_options need to be accessible to
3316 * builtin-script, leave it here.
3317 *
3318 * At least we don't ouch it in all the other functions here directly.
3319 *
3320 * Just say no to tons of global variables, sigh.
3321 */
3322static struct record record = {
3323 .opts = {
3324 .sample_time = true,
3325 .mmap_pages = UINT_MAX,
3326 .user_freq = UINT_MAX,
3327 .user_interval = ULLONG_MAX,
3328 .freq = 4000,
3329 .target = {
3330 .uses_mmap = true,
3331 .default_per_cpu = true,
3332 },
3333 .mmap_flush = MMAP_FLUSH_DEFAULT,
3334 .nr_threads_synthesize = 1,
3335 .ctl_fd = -1,
3336 .ctl_fd_ack = -1,
3337 .synth = PERF_SYNTH_ALL,
3338 },
3339 .tool = {
3340 .sample = process_sample_event,
3341 .fork = perf_event__process_fork,
3342 .exit = perf_event__process_exit,
3343 .comm = perf_event__process_comm,
3344 .namespaces = perf_event__process_namespaces,
3345 .mmap = build_id__process_mmap,
3346 .mmap2 = build_id__process_mmap2,
3347 .itrace_start = process_timestamp_boundary,
3348 .aux = process_timestamp_boundary,
3349 .ordered_events = true,
3350 },
3351};
3352
3353const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3354 "\n\t\t\t\tDefault: fp";
3355
3356static bool dry_run;
3357
3358static struct parse_events_option_args parse_events_option_args = {
3359 .evlistp = &record.evlist,
3360};
3361
3362static struct parse_events_option_args switch_output_parse_events_option_args = {
3363 .evlistp = &record.sb_evlist,
3364};
3365
3366/*
3367 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3368 * with it and switch to use the library functions in perf_evlist that came
3369 * from builtin-record.c, i.e. use record_opts,
3370 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3371 * using pipes, etc.
3372 */
3373static struct option __record_options[] = {
3374 OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3375 "event selector. use 'perf list' to list available events",
3376 parse_events_option),
3377 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3378 "event filter", parse_filter),
3379 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3380 NULL, "don't record events from perf itself",
3381 exclude_perf),
3382 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3383 "record events on existing process id"),
3384 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3385 "record events on existing thread id"),
3386 OPT_INTEGER('r', "realtime", &record.realtime_prio,
3387 "collect data with this RT SCHED_FIFO priority"),
3388 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3389 "collect data without buffering"),
3390 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3391 "collect raw sample records from all opened counters"),
3392 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3393 "system-wide collection from all CPUs"),
3394 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3395 "list of cpus to monitor"),
3396 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3397 OPT_STRING('o', "output", &record.data.path, "file",
3398 "output file name"),
3399 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3400 &record.opts.no_inherit_set,
3401 "child tasks do not inherit counters"),
3402 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3403 "synthesize non-sample events at the end of output"),
3404 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3405 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3406 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3407 "Fail if the specified frequency can't be used"),
3408 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3409 "profile at this frequency",
3410 record__parse_freq),
3411 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3412 "number of mmap data pages and AUX area tracing mmap pages",
3413 record__parse_mmap_pages),
3414 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3415 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3416 record__mmap_flush_parse),
3417 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3418 NULL, "enables call-graph recording" ,
3419 &record_callchain_opt),
3420 OPT_CALLBACK(0, "call-graph", &record.opts,
3421 "record_mode[,record_size]", record_callchain_help,
3422 &record_parse_callchain_opt),
3423 OPT_INCR('v', "verbose", &verbose,
3424 "be more verbose (show counter open errors, etc)"),
3425 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3426 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3427 "per thread counts"),
3428 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3429 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3430 "Record the sample physical addresses"),
3431 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3432 "Record the sampled data address data page size"),
3433 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3434 "Record the sampled code address (ip) page size"),
3435 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3436 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3437 "Record the sample identifier"),
3438 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3439 &record.opts.sample_time_set,
3440 "Record the sample timestamps"),
3441 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3442 "Record the sample period"),
3443 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3444 "don't sample"),
3445 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3446 &record.no_buildid_cache_set,
3447 "do not update the buildid cache"),
3448 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3449 &record.no_buildid_set,
3450 "do not collect buildids in perf.data"),
3451 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3452 "monitor event in cgroup name only",
3453 parse_cgroups),
3454 OPT_CALLBACK('D', "delay", &record, "ms",
3455 "ms to wait before starting measurement after program start (-1: start with events disabled), "
3456 "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3457 record__parse_event_enable_time),
3458 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3459 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3460 "user to profile"),
3461
3462 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3463 "branch any", "sample any taken branches",
3464 parse_branch_stack),
3465
3466 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3467 "branch filter mask", "branch stack filter modes",
3468 parse_branch_stack),
3469 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3470 "sample by weight (on special events only)"),
3471 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3472 "sample transaction flags (special events only)"),
3473 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3474 "use per-thread mmaps"),
3475 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3476 "sample selected machine registers on interrupt,"
3477 " use '-I?' to list register names", parse_intr_regs),
3478 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3479 "sample selected machine registers on interrupt,"
3480 " use '--user-regs=?' to list register names", parse_user_regs),
3481 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3482 "Record running/enabled time of read (:S) events"),
3483 OPT_CALLBACK('k', "clockid", &record.opts,
3484 "clockid", "clockid to use for events, see clock_gettime()",
3485 parse_clockid),
3486 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3487 "opts", "AUX area tracing Snapshot Mode", ""),
3488 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3489 "opts", "sample AUX area", ""),
3490 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3491 "per thread proc mmap processing timeout in ms"),
3492 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3493 "Record namespaces events"),
3494 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3495 "Record cgroup events"),
3496 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3497 &record.opts.record_switch_events_set,
3498 "Record context switch events"),
3499 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3500 "Configure all used events to run in kernel space.",
3501 PARSE_OPT_EXCLUSIVE),
3502 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3503 "Configure all used events to run in user space.",
3504 PARSE_OPT_EXCLUSIVE),
3505 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3506 "collect kernel callchains"),
3507 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3508 "collect user callchains"),
3509 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3510 "file", "vmlinux pathname"),
3511 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3512 "Record build-id of all DSOs regardless of hits"),
3513 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3514 "Record build-id in map events"),
3515 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3516 "append timestamp to output filename"),
3517 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3518 "Record timestamp boundary (time of first/last samples)"),
3519 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3520 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3521 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3522 "signal"),
3523 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3524 &record.switch_output_event_set, "switch output event",
3525 "switch output event selector. use 'perf list' to list available events",
3526 parse_events_option_new_evlist),
3527 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3528 "Limit number of switch output generated files"),
3529 OPT_BOOLEAN(0, "dry-run", &dry_run,
3530 "Parse options then exit"),
3531#ifdef HAVE_AIO_SUPPORT
3532 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3533 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3534 record__aio_parse),
3535#endif
3536 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3537 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3538 record__parse_affinity),
3539#ifdef HAVE_ZSTD_SUPPORT
3540 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3541 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3542 record__parse_comp_level),
3543#endif
3544 OPT_CALLBACK(0, "max-size", &record.output_max_size,
3545 "size", "Limit the maximum size of the output file", parse_output_max_size),
3546 OPT_UINTEGER(0, "num-thread-synthesize",
3547 &record.opts.nr_threads_synthesize,
3548 "number of threads to run for event synthesis"),
3549#ifdef HAVE_LIBPFM
3550 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3551 "libpfm4 event selector. use 'perf list' to list available events",
3552 parse_libpfm_events_option),
3553#endif
3554 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3555 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3556 "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3557 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3558 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3559 parse_control_option),
3560 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3561 "Fine-tune event synthesis: default=all", parse_record_synth_option),
3562 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3563 &record.debuginfod.set, "debuginfod urls",
3564 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3565 "system"),
3566 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3567 "write collected trace data into several data files using parallel threads",
3568 record__parse_threads),
3569 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3570 OPT_END()
3571};
3572
3573struct option *record_options = __record_options;
3574
3575static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3576{
3577 struct perf_cpu cpu;
3578 int idx;
3579
3580 if (cpu_map__is_dummy(cpus))
3581 return 0;
3582
3583 perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
3584 if (cpu.cpu == -1)
3585 continue;
3586 /* Return ENODEV is input cpu is greater than max cpu */
3587 if ((unsigned long)cpu.cpu > mask->nbits)
3588 return -ENODEV;
3589 __set_bit(cpu.cpu, mask->bits);
3590 }
3591
3592 return 0;
3593}
3594
3595static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3596{
3597 struct perf_cpu_map *cpus;
3598
3599 cpus = perf_cpu_map__new(mask_spec);
3600 if (!cpus)
3601 return -ENOMEM;
3602
3603 bitmap_zero(dst: mask->bits, nbits: mask->nbits);
3604 if (record__mmap_cpu_mask_init(mask, cpus))
3605 return -ENODEV;
3606
3607 perf_cpu_map__put(cpus);
3608
3609 return 0;
3610}
3611
3612static void record__free_thread_masks(struct record *rec, int nr_threads)
3613{
3614 int t;
3615
3616 if (rec->thread_masks)
3617 for (t = 0; t < nr_threads; t++)
3618 record__thread_mask_free(mask: &rec->thread_masks[t]);
3619
3620 zfree(&rec->thread_masks);
3621}
3622
3623static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3624{
3625 int t, ret;
3626
3627 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3628 if (!rec->thread_masks) {
3629 pr_err("Failed to allocate thread masks\n");
3630 return -ENOMEM;
3631 }
3632
3633 for (t = 0; t < nr_threads; t++) {
3634 ret = record__thread_mask_alloc(mask: &rec->thread_masks[t], nr_bits);
3635 if (ret) {
3636 pr_err("Failed to allocate thread masks[%d]\n", t);
3637 goto out_free;
3638 }
3639 }
3640
3641 return 0;
3642
3643out_free:
3644 record__free_thread_masks(rec, nr_threads);
3645
3646 return ret;
3647}
3648
3649static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3650{
3651 int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3652
3653 ret = record__alloc_thread_masks(rec, nr_threads: nr_cpus, nr_bits: cpu__max_cpu().cpu);
3654 if (ret)
3655 return ret;
3656
3657 rec->nr_threads = nr_cpus;
3658 pr_debug("nr_threads: %d\n", rec->nr_threads);
3659
3660 for (t = 0; t < rec->nr_threads; t++) {
3661 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3662 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3663 if (verbose > 0) {
3664 pr_debug("thread_masks[%d]: ", t);
3665 mmap_cpu_mask__scnprintf(mask: &rec->thread_masks[t].maps, tag: "maps");
3666 pr_debug("thread_masks[%d]: ", t);
3667 mmap_cpu_mask__scnprintf(mask: &rec->thread_masks[t].affinity, tag: "affinity");
3668 }
3669 }
3670
3671 return 0;
3672}
3673
3674static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3675 const char **maps_spec, const char **affinity_spec,
3676 u32 nr_spec)
3677{
3678 u32 s;
3679 int ret = 0, t = 0;
3680 struct mmap_cpu_mask cpus_mask;
3681 struct thread_mask thread_mask, full_mask, *thread_masks;
3682
3683 ret = record__mmap_cpu_mask_alloc(mask: &cpus_mask, nr_bits: cpu__max_cpu().cpu);
3684 if (ret) {
3685 pr_err("Failed to allocate CPUs mask\n");
3686 return ret;
3687 }
3688
3689 ret = record__mmap_cpu_mask_init(mask: &cpus_mask, cpus);
3690 if (ret) {
3691 pr_err("Failed to init cpu mask\n");
3692 goto out_free_cpu_mask;
3693 }
3694
3695 ret = record__thread_mask_alloc(mask: &full_mask, nr_bits: cpu__max_cpu().cpu);
3696 if (ret) {
3697 pr_err("Failed to allocate full mask\n");
3698 goto out_free_cpu_mask;
3699 }
3700
3701 ret = record__thread_mask_alloc(mask: &thread_mask, nr_bits: cpu__max_cpu().cpu);
3702 if (ret) {
3703 pr_err("Failed to allocate thread mask\n");
3704 goto out_free_full_and_cpu_masks;
3705 }
3706
3707 for (s = 0; s < nr_spec; s++) {
3708 ret = record__mmap_cpu_mask_init_spec(mask: &thread_mask.maps, mask_spec: maps_spec[s]);
3709 if (ret) {
3710 pr_err("Failed to initialize maps thread mask\n");
3711 goto out_free;
3712 }
3713 ret = record__mmap_cpu_mask_init_spec(mask: &thread_mask.affinity, mask_spec: affinity_spec[s]);
3714 if (ret) {
3715 pr_err("Failed to initialize affinity thread mask\n");
3716 goto out_free;
3717 }
3718
3719 /* ignore invalid CPUs but do not allow empty masks */
3720 if (!bitmap_and(dst: thread_mask.maps.bits, src1: thread_mask.maps.bits,
3721 src2: cpus_mask.bits, nbits: thread_mask.maps.nbits)) {
3722 pr_err("Empty maps mask: %s\n", maps_spec[s]);
3723 ret = -EINVAL;
3724 goto out_free;
3725 }
3726 if (!bitmap_and(dst: thread_mask.affinity.bits, src1: thread_mask.affinity.bits,
3727 src2: cpus_mask.bits, nbits: thread_mask.affinity.nbits)) {
3728 pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3729 ret = -EINVAL;
3730 goto out_free;
3731 }
3732
3733 /* do not allow intersection with other masks (full_mask) */
3734 if (bitmap_intersects(src1: thread_mask.maps.bits, src2: full_mask.maps.bits,
3735 nbits: thread_mask.maps.nbits)) {
3736 pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3737 ret = -EINVAL;
3738 goto out_free;
3739 }
3740 if (bitmap_intersects(src1: thread_mask.affinity.bits, src2: full_mask.affinity.bits,
3741 nbits: thread_mask.affinity.nbits)) {
3742 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3743 ret = -EINVAL;
3744 goto out_free;
3745 }
3746
3747 bitmap_or(dst: full_mask.maps.bits, src1: full_mask.maps.bits,
3748 src2: thread_mask.maps.bits, nbits: full_mask.maps.nbits);
3749 bitmap_or(dst: full_mask.affinity.bits, src1: full_mask.affinity.bits,
3750 src2: thread_mask.affinity.bits, nbits: full_mask.maps.nbits);
3751
3752 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3753 if (!thread_masks) {
3754 pr_err("Failed to reallocate thread masks\n");
3755 ret = -ENOMEM;
3756 goto out_free;
3757 }
3758 rec->thread_masks = thread_masks;
3759 rec->thread_masks[t] = thread_mask;
3760 if (verbose > 0) {
3761 pr_debug("thread_masks[%d]: ", t);
3762 mmap_cpu_mask__scnprintf(mask: &rec->thread_masks[t].maps, tag: "maps");
3763 pr_debug("thread_masks[%d]: ", t);
3764 mmap_cpu_mask__scnprintf(mask: &rec->thread_masks[t].affinity, tag: "affinity");
3765 }
3766 t++;
3767 ret = record__thread_mask_alloc(mask: &thread_mask, nr_bits: cpu__max_cpu().cpu);
3768 if (ret) {
3769 pr_err("Failed to allocate thread mask\n");
3770 goto out_free_full_and_cpu_masks;
3771 }
3772 }
3773 rec->nr_threads = t;
3774 pr_debug("nr_threads: %d\n", rec->nr_threads);
3775 if (!rec->nr_threads)
3776 ret = -EINVAL;
3777
3778out_free:
3779 record__thread_mask_free(mask: &thread_mask);
3780out_free_full_and_cpu_masks:
3781 record__thread_mask_free(mask: &full_mask);
3782out_free_cpu_mask:
3783 record__mmap_cpu_mask_free(mask: &cpus_mask);
3784
3785 return ret;
3786}
3787
3788static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3789{
3790 int ret;
3791 struct cpu_topology *topo;
3792
3793 topo = cpu_topology__new();
3794 if (!topo) {
3795 pr_err("Failed to allocate CPU topology\n");
3796 return -ENOMEM;
3797 }
3798
3799 ret = record__init_thread_masks_spec(rec, cpus, maps_spec: topo->core_cpus_list,
3800 affinity_spec: topo->core_cpus_list, nr_spec: topo->core_cpus_lists);
3801 cpu_topology__delete(topo);
3802
3803 return ret;
3804}
3805
3806static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3807{
3808 int ret;
3809 struct cpu_topology *topo;
3810
3811 topo = cpu_topology__new();
3812 if (!topo) {
3813 pr_err("Failed to allocate CPU topology\n");
3814 return -ENOMEM;
3815 }
3816
3817 ret = record__init_thread_masks_spec(rec, cpus, maps_spec: topo->package_cpus_list,
3818 affinity_spec: topo->package_cpus_list, nr_spec: topo->package_cpus_lists);
3819 cpu_topology__delete(topo);
3820
3821 return ret;
3822}
3823
3824static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3825{
3826 u32 s;
3827 int ret;
3828 const char **spec;
3829 struct numa_topology *topo;
3830
3831 topo = numa_topology__new();
3832 if (!topo) {
3833 pr_err("Failed to allocate NUMA topology\n");
3834 return -ENOMEM;
3835 }
3836
3837 spec = zalloc(topo->nr * sizeof(char *));
3838 if (!spec) {
3839 pr_err("Failed to allocate NUMA spec\n");
3840 ret = -ENOMEM;
3841 goto out_delete_topo;
3842 }
3843 for (s = 0; s < topo->nr; s++)
3844 spec[s] = topo->nodes[s].cpus;
3845
3846 ret = record__init_thread_masks_spec(rec, cpus, maps_spec: spec, affinity_spec: spec, nr_spec: topo->nr);
3847
3848 zfree(&spec);
3849
3850out_delete_topo:
3851 numa_topology__delete(topo);
3852
3853 return ret;
3854}
3855
3856static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3857{
3858 int t, ret;
3859 u32 s, nr_spec = 0;
3860 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3861 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3862
3863 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3864 spec = strtok_r(user_spec, ":", &spec_ptr);
3865 if (spec == NULL)
3866 break;
3867 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3868 mask = strtok_r(spec, "/", &mask_ptr);
3869 if (mask == NULL)
3870 break;
3871 pr_debug2(" maps mask: %s\n", mask);
3872 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3873 if (!tmp_spec) {
3874 pr_err("Failed to reallocate maps spec\n");
3875 ret = -ENOMEM;
3876 goto out_free;
3877 }
3878 maps_spec = tmp_spec;
3879 maps_spec[nr_spec] = dup_mask = strdup(mask);
3880 if (!maps_spec[nr_spec]) {
3881 pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3882 ret = -ENOMEM;
3883 goto out_free;
3884 }
3885 mask = strtok_r(NULL, "/", &mask_ptr);
3886 if (mask == NULL) {
3887 pr_err("Invalid thread maps or affinity specs\n");
3888 ret = -EINVAL;
3889 goto out_free;
3890 }
3891 pr_debug2(" affinity mask: %s\n", mask);
3892 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3893 if (!tmp_spec) {
3894 pr_err("Failed to reallocate affinity spec\n");
3895 ret = -ENOMEM;
3896 goto out_free;
3897 }
3898 affinity_spec = tmp_spec;
3899 affinity_spec[nr_spec] = strdup(mask);
3900 if (!affinity_spec[nr_spec]) {
3901 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3902 ret = -ENOMEM;
3903 goto out_free;
3904 }
3905 dup_mask = NULL;
3906 nr_spec++;
3907 }
3908
3909 ret = record__init_thread_masks_spec(rec, cpus, maps_spec: (const char **)maps_spec,
3910 affinity_spec: (const char **)affinity_spec, nr_spec);
3911
3912out_free:
3913 free(dup_mask);
3914 for (s = 0; s < nr_spec; s++) {
3915 if (maps_spec)
3916 free(maps_spec[s]);
3917 if (affinity_spec)
3918 free(affinity_spec[s]);
3919 }
3920 free(affinity_spec);
3921 free(maps_spec);
3922
3923 return ret;
3924}
3925
3926static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3927{
3928 int ret;
3929
3930 ret = record__alloc_thread_masks(rec, nr_threads: 1, nr_bits: cpu__max_cpu().cpu);
3931 if (ret)
3932 return ret;
3933
3934 if (record__mmap_cpu_mask_init(mask: &rec->thread_masks->maps, cpus))
3935 return -ENODEV;
3936
3937 rec->nr_threads = 1;
3938
3939 return 0;
3940}
3941
3942static int record__init_thread_masks(struct record *rec)
3943{
3944 int ret = 0;
3945 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3946
3947 if (!record__threads_enabled(rec))
3948 return record__init_thread_default_masks(rec, cpus);
3949
3950 if (evlist__per_thread(evlist: rec->evlist)) {
3951 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3952 return -EINVAL;
3953 }
3954
3955 switch (rec->opts.threads_spec) {
3956 case THREAD_SPEC__CPU:
3957 ret = record__init_thread_cpu_masks(rec, cpus);
3958 break;
3959 case THREAD_SPEC__CORE:
3960 ret = record__init_thread_core_masks(rec, cpus);
3961 break;
3962 case THREAD_SPEC__PACKAGE:
3963 ret = record__init_thread_package_masks(rec, cpus);
3964 break;
3965 case THREAD_SPEC__NUMA:
3966 ret = record__init_thread_numa_masks(rec, cpus);
3967 break;
3968 case THREAD_SPEC__USER:
3969 ret = record__init_thread_user_masks(rec, cpus);
3970 break;
3971 default:
3972 break;
3973 }
3974
3975 return ret;
3976}
3977
3978int cmd_record(int argc, const char **argv)
3979{
3980 int err;
3981 struct record *rec = &record;
3982 char errbuf[BUFSIZ];
3983
3984 setlocale(LC_ALL, "");
3985
3986#ifndef HAVE_BPF_SKEL
3987# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3988 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3989# undef set_nobuild
3990#endif
3991
3992 rec->opts.affinity = PERF_AFFINITY_SYS;
3993
3994 rec->evlist = evlist__new();
3995 if (rec->evlist == NULL)
3996 return -ENOMEM;
3997
3998 err = perf_config(fn: perf_record_config, rec);
3999 if (err)
4000 return err;
4001
4002 argc = parse_options(argc, argv, record_options, record_usage,
4003 PARSE_OPT_STOP_AT_NON_OPTION);
4004 if (quiet)
4005 perf_quiet_option();
4006
4007 err = symbol__validate_sym_arguments();
4008 if (err)
4009 return err;
4010
4011 perf_debuginfod_setup(di: &record.debuginfod);
4012
4013 /* Make system wide (-a) the default target. */
4014 if (!argc && target__none(target: &rec->opts.target))
4015 rec->opts.target.system_wide = true;
4016
4017 if (nr_cgroups && !rec->opts.target.system_wide) {
4018 usage_with_options_msg(record_usage, record_options,
4019 "cgroup monitoring only available in system-wide mode");
4020
4021 }
4022
4023 if (rec->buildid_mmap) {
4024 if (!perf_can_record_build_id()) {
4025 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
4026 err = -EINVAL;
4027 goto out_opts;
4028 }
4029 pr_debug("Enabling build id in mmap2 events.\n");
4030 /* Enable mmap build id synthesizing. */
4031 symbol_conf.buildid_mmap2 = true;
4032 /* Enable perf_event_attr::build_id bit. */
4033 rec->opts.build_id = true;
4034 /* Disable build id cache. */
4035 rec->no_buildid = true;
4036 }
4037
4038 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4039 pr_err("Kernel has no cgroup sampling support.\n");
4040 err = -EINVAL;
4041 goto out_opts;
4042 }
4043
4044 if (rec->opts.kcore)
4045 rec->opts.text_poke = true;
4046
4047 if (rec->opts.kcore || record__threads_enabled(rec))
4048 rec->data.is_dir = true;
4049
4050 if (record__threads_enabled(rec)) {
4051 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4052 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4053 goto out_opts;
4054 }
4055 if (record__aio_enabled(rec)) {
4056 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4057 goto out_opts;
4058 }
4059 }
4060
4061 if (rec->opts.comp_level != 0) {
4062 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4063 rec->no_buildid = true;
4064 }
4065
4066 if (rec->opts.record_switch_events &&
4067 !perf_can_record_switch_events()) {
4068 ui__error(format: "kernel does not support recording context switch events\n");
4069 parse_options_usage(record_usage, record_options, "switch-events", 0);
4070 err = -EINVAL;
4071 goto out_opts;
4072 }
4073
4074 if (switch_output_setup(rec)) {
4075 parse_options_usage(record_usage, record_options, "switch-output", 0);
4076 err = -EINVAL;
4077 goto out_opts;
4078 }
4079
4080 if (rec->switch_output.time) {
4081 signal(SIGALRM, alarm_sig_handler);
4082 alarm(rec->switch_output.time);
4083 }
4084
4085 if (rec->switch_output.num_files) {
4086 rec->switch_output.filenames = calloc(sizeof(char *),
4087 rec->switch_output.num_files);
4088 if (!rec->switch_output.filenames) {
4089 err = -EINVAL;
4090 goto out_opts;
4091 }
4092 }
4093
4094 if (rec->timestamp_filename && record__threads_enabled(rec)) {
4095 rec->timestamp_filename = false;
4096 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4097 }
4098
4099 /*
4100 * Allow aliases to facilitate the lookup of symbols for address
4101 * filters. Refer to auxtrace_parse_filters().
4102 */
4103 symbol_conf.allow_aliases = true;
4104
4105 symbol__init(NULL);
4106
4107 err = record__auxtrace_init(rec);
4108 if (err)
4109 goto out;
4110
4111 if (dry_run)
4112 goto out;
4113
4114 err = -ENOMEM;
4115
4116 if (rec->no_buildid_cache || rec->no_buildid) {
4117 disable_buildid_cache();
4118 } else if (rec->switch_output.enabled) {
4119 /*
4120 * In 'perf record --switch-output', disable buildid
4121 * generation by default to reduce data file switching
4122 * overhead. Still generate buildid if they are required
4123 * explicitly using
4124 *
4125 * perf record --switch-output --no-no-buildid \
4126 * --no-no-buildid-cache
4127 *
4128 * Following code equals to:
4129 *
4130 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4131 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4132 * disable_buildid_cache();
4133 */
4134 bool disable = true;
4135
4136 if (rec->no_buildid_set && !rec->no_buildid)
4137 disable = false;
4138 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4139 disable = false;
4140 if (disable) {
4141 rec->no_buildid = true;
4142 rec->no_buildid_cache = true;
4143 disable_buildid_cache();
4144 }
4145 }
4146
4147 if (record.opts.overwrite)
4148 record.opts.tail_synthesize = true;
4149
4150 if (rec->evlist->core.nr_entries == 0) {
4151 bool can_profile_kernel = perf_event_paranoid_check(max_level: 1);
4152
4153 err = parse_event(evlist: rec->evlist, str: can_profile_kernel ? "cycles:P" : "cycles:Pu");
4154 if (err)
4155 goto out;
4156 }
4157
4158 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4159 rec->opts.no_inherit = true;
4160
4161 err = target__validate(target: &rec->opts.target);
4162 if (err) {
4163 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4164 ui__warning(format: "%s\n", errbuf);
4165 }
4166
4167 err = target__parse_uid(target: &rec->opts.target);
4168 if (err) {
4169 int saved_errno = errno;
4170
4171 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4172 ui__error(format: "%s", errbuf);
4173
4174 err = -saved_errno;
4175 goto out;
4176 }
4177
4178 /* Enable ignoring missing threads when -u/-p option is defined. */
4179 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4180
4181 evlist__warn_user_requested_cpus(evlist: rec->evlist, cpu_list: rec->opts.target.cpu_list);
4182
4183 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4184 arch__add_leaf_frame_record_opts(opts: &rec->opts);
4185
4186 err = -ENOMEM;
4187 if (evlist__create_maps(evlist: rec->evlist, target: &rec->opts.target) < 0) {
4188 if (rec->opts.target.pid != NULL) {
4189 pr_err("Couldn't create thread/CPU maps: %s\n",
4190 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4191 goto out;
4192 }
4193 else
4194 usage_with_options(record_usage, record_options);
4195 }
4196
4197 err = auxtrace_record__options(itr: rec->itr, evlist: rec->evlist, opts: &rec->opts);
4198 if (err)
4199 goto out;
4200
4201 /*
4202 * We take all buildids when the file contains
4203 * AUX area tracing data because we do not decode the
4204 * trace because it would take too long.
4205 */
4206 if (rec->opts.full_auxtrace)
4207 rec->buildid_all = true;
4208
4209 if (rec->opts.text_poke) {
4210 err = record__config_text_poke(evlist: rec->evlist);
4211 if (err) {
4212 pr_err("record__config_text_poke failed, error %d\n", err);
4213 goto out;
4214 }
4215 }
4216
4217 if (rec->off_cpu) {
4218 err = record__config_off_cpu(rec);
4219 if (err) {
4220 pr_err("record__config_off_cpu failed, error %d\n", err);
4221 goto out;
4222 }
4223 }
4224
4225 if (record_opts__config(opts: &rec->opts)) {
4226 err = -EINVAL;
4227 goto out;
4228 }
4229
4230 err = record__config_tracking_events(rec);
4231 if (err) {
4232 pr_err("record__config_tracking_events failed, error %d\n", err);
4233 goto out;
4234 }
4235
4236 err = record__init_thread_masks(rec);
4237 if (err) {
4238 pr_err("Failed to initialize parallel data streaming masks\n");
4239 goto out;
4240 }
4241
4242 if (rec->opts.nr_cblocks > nr_cblocks_max)
4243 rec->opts.nr_cblocks = nr_cblocks_max;
4244 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4245
4246 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4247 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4248
4249 if (rec->opts.comp_level > comp_level_max)
4250 rec->opts.comp_level = comp_level_max;
4251 pr_debug("comp level: %d\n", rec->opts.comp_level);
4252
4253 err = __cmd_record(rec: &record, argc, argv);
4254out:
4255 evlist__delete(evlist: rec->evlist);
4256 symbol__exit();
4257 auxtrace_record__free(itr: rec->itr);
4258out_opts:
4259 record__free_thread_masks(rec, nr_threads: rec->nr_threads);
4260 rec->nr_threads = 0;
4261 evlist__close_control(ctl_fd: rec->opts.ctl_fd, ctl_fd_ack: rec->opts.ctl_fd_ack, ctl_fd_close: &rec->opts.ctl_fd_close);
4262 return err;
4263}
4264
4265static void snapshot_sig_handler(int sig __maybe_unused)
4266{
4267 struct record *rec = &record;
4268
4269 hit_auxtrace_snapshot_trigger(rec);
4270
4271 if (switch_output_signal(rec))
4272 trigger_hit(t: &switch_output_trigger);
4273}
4274
4275static void alarm_sig_handler(int sig __maybe_unused)
4276{
4277 struct record *rec = &record;
4278
4279 if (switch_output_time(rec))
4280 trigger_hit(t: &switch_output_trigger);
4281}
4282

source code of linux/tools/perf/builtin-record.c