builtin-record.c source code [linux/tools/perf/builtin-record.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* builtin-record.c
4	*
5	* Builtin record command: Record the profile of a workload
6	* (or a CPU, or a PID) into the perf.data output file - for
7	* later analysis via perf report.
8	*/
9	#include "builtin.h"
10
11	#include "util/build-id.h"
12	#include <subcmd/parse-options.h>
13	#include <internal/xyarray.h>
14	#include "util/parse-events.h"
15	#include "util/config.h"
16
17	#include "util/callchain.h"
18	#include "util/cgroup.h"
19	#include "util/header.h"
20	#include "util/event.h"
21	#include "util/evlist.h"
22	#include "util/evsel.h"
23	#include "util/debug.h"
24	#include "util/mmap.h"
25	#include "util/mutex.h"
26	#include "util/target.h"
27	#include "util/session.h"
28	#include "util/tool.h"
29	#include "util/symbol.h"
30	#include "util/record.h"
31	#include "util/cpumap.h"
32	#include "util/thread_map.h"
33	#include "util/data.h"
34	#include "util/perf_regs.h"
35	#include "util/auxtrace.h"
36	#include "util/tsc.h"
37	#include "util/parse-branch-options.h"
38	#include "util/parse-regs-options.h"
39	#include "util/perf_api_probe.h"
40	#include "util/trigger.h"
41	#include "util/perf-hooks.h"
42	#include "util/cpu-set-sched.h"
43	#include "util/synthetic-events.h"
44	#include "util/time-utils.h"
45	#include "util/units.h"
46	#include "util/bpf-event.h"
47	#include "util/util.h"
48	#include "util/pfm.h"
49	#include "util/pmu.h"
50	#include "util/pmus.h"
51	#include "util/clockid.h"
52	#include "util/off_cpu.h"
53	#include "util/bpf-filter.h"
54	#include "asm/bug.h"
55	#include "perf.h"
56	#include "cputopo.h"
57
58	#include <errno.h>
59	#include <inttypes.h>
60	#include <locale.h>
61	#include <poll.h>
62	#include <pthread.h>
63	#include <unistd.h>
64	#ifndef HAVE_GETTID
65	#include <syscall.h>
66	#endif
67	#include <sched.h>
68	#include <signal.h>
69	#ifdef HAVE_EVENTFD_SUPPORT
70	#include <sys/eventfd.h>
71	#endif
72	#include <sys/mman.h>
73	#include <sys/wait.h>
74	#include <sys/types.h>
75	#include <sys/stat.h>
76	#include <fcntl.h>
77	#include <linux/err.h>
78	#include <linux/string.h>
79	#include <linux/time64.h>
80	#include <linux/zalloc.h>
81	#include <linux/bitmap.h>
82	#include <sys/time.h>
83
84	struct switch_output {
85	bool enabled;
86	bool signal;
87	unsigned long size;
88	unsigned long time;
89	const char *str;
90	bool set;
91	char **filenames;
92	int num_files;
93	int cur_file;
94	};
95
96	struct thread_mask {
97	struct mmap_cpu_mask maps;
98	struct mmap_cpu_mask affinity;
99	};
100
101	struct record_thread {
102	pid_t tid;
103	struct thread_mask *mask;
104	struct {
105	int msg[`2`];
106	int ack[`2`];
107	} pipes;
108	struct fdarray pollfd;
109	int ctlfd_pos;
110	int nr_mmaps;
111	struct mmap **maps;
112	struct mmap **overwrite_maps;
113	struct record *rec;
114	unsigned long long samples;
115	unsigned long waking;
116	u64 bytes_written;
117	u64 bytes_transferred;
118	u64 bytes_compressed;
119	};
120
121	static __thread struct record_thread *thread;
122
123	enum thread_msg {
124	THREAD_MSG__UNDEFINED = `0`,
125	THREAD_MSG__READY,
126	THREAD_MSG__MAX,
127	};
128
129	static const char *thread_msg_tags[THREAD_MSG__MAX] = {
130	"UNDEFINED", "READY"
131	};
132
133	enum thread_spec {
134	THREAD_SPEC__UNDEFINED = `0`,
135	THREAD_SPEC__CPU,
136	THREAD_SPEC__CORE,
137	THREAD_SPEC__PACKAGE,
138	THREAD_SPEC__NUMA,
139	THREAD_SPEC__USER,
140	THREAD_SPEC__MAX,
141	};
142
143	static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
144	"undefined", "cpu", "core", "package", "numa", "user"
145	};
146
147	struct pollfd_index_map {
148	int evlist_pollfd_index;
149	int thread_pollfd_index;
150	};
151
152	struct record {
153	struct perf_tool tool;
154	struct record_opts opts;
155	u64 bytes_written;
156	u64 thread_bytes_written;
157	struct perf_data data;
158	struct auxtrace_record *itr;
159	struct evlist *evlist;
160	struct perf_session *session;
161	struct evlist *sb_evlist;
162	pthread_t thread_id;
163	int realtime_prio;
164	bool switch_output_event_set;
165	bool no_buildid;
166	bool no_buildid_set;
167	bool no_buildid_cache;
168	bool no_buildid_cache_set;
169	bool buildid_all;
170	bool buildid_mmap;
171	bool timestamp_filename;
172	bool timestamp_boundary;
173	bool off_cpu;
174	struct switch_output switch_output;
175	unsigned long long samples;
176	unsigned long output_max_size; / = 0: unlimited /
177	struct perf_debuginfod debuginfod;
178	int nr_threads;
179	struct thread_mask *thread_masks;
180	struct record_thread *thread_data;
181	struct pollfd_index_map *index_map;
182	size_t index_map_sz;
183	size_t index_map_cnt;
184	};
185
186	static volatile int done;
187
188	static volatile int auxtrace_record__snapshot_started;
189	static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
190	static DEFINE_TRIGGER(switch_output_trigger);
191
192	static const char *affinity_tags[PERF_AFFINITY_MAX] = {
193	"SYS", "NODE", "CPU"
194	};
195
196	#ifndef HAVE_GETTID
197	static inline pid_t gettid(void)
198	{
199	return (pid_t)syscall(__NR_gettid);
200	}
201	#endif
202
203	static int record__threads_enabled(struct record *rec)
204	{
205	return rec->opts.threads_spec;
206	}
207
208	static bool switch_output_signal(struct record *rec)
209	{
210	return rec->switch_output.signal &&
211	trigger_is_ready(t: &switch_output_trigger);
212	}
213
214	static bool switch_output_size(struct record *rec)
215	{
216	return rec->switch_output.size &&
217	trigger_is_ready(t: &switch_output_trigger) &&
218	(rec->bytes_written >= rec->switch_output.size);
219	}
220
221	static bool switch_output_time(struct record *rec)
222	{
223	return rec->switch_output.time &&
224	trigger_is_ready(t: &switch_output_trigger);
225	}
226
227	static u64 record__bytes_written(struct record *rec)
228	{
229	return rec->bytes_written + rec->thread_bytes_written;
230	}
231
232	static bool record__output_max_size_exceeded(struct record *rec)
233	{
234	return rec->output_max_size &&
235	(record__bytes_written(rec) >= rec->output_max_size);
236	}
237
238	static int record__write(struct record rec, struct* mmap *map __maybe_unused,
239	void *bf, size_t size)
240	{
241	struct perf_data_file *file = &rec->session->data->file;
242
243	if (map && map->file)
244	file = map->file;
245
246	if (perf_data_file__write(file, buf: bf, size) < `0`) {
247	pr_err("failed to write perf data, error: %m\n");
248	return -`1`;
249	}
250
251	if (map && map->file) {
252	thread->bytes_written += size;
253	rec->thread_bytes_written += size;
254	} else {
255	rec->bytes_written += size;
256	}
257
258	if (record__output_max_size_exceeded(rec) && !done) {
259	fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
260	" stopping session ]\n",
261	record__bytes_written(rec) >> `10`);
262	done = `1`;
263	}
264
265	if (switch_output_size(rec))
266	trigger_hit(t: &switch_output_trigger);
267
268	return `0`;
269	}
270
271	static int record__aio_enabled(struct record *rec);
272	static int record__comp_enabled(struct record *rec);
273	static size_t zstd_compress(struct perf_session session, struct* mmap *map,
274	void dst, size_t dst_size, void* *src, size_t src_size);
275
276	#ifdef HAVE_AIO_SUPPORT
277	static int record__aio_write(struct aiocb cblock, int* trace_fd,
278	void *buf, size_t size, off_t off)
279	{
280	int rc;
281
282	cblock->aio_fildes = trace_fd;
283	cblock->aio_buf = buf;
284	cblock->aio_nbytes = size;
285	cblock->aio_offset = off;
286	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
287
288	do {
289	rc = aio_write(cblock);
290	if (rc == `0`) {
291	break;
292	} else if (errno != EAGAIN) {
293	cblock->aio_fildes = -`1`;
294	pr_err("failed to queue perf data, error: %m\n");
295	break;
296	}
297	} while (`1`);
298
299	return rc;
300	}
301
302	static int record__aio_complete(struct mmap md, struct* aiocb *cblock)
303	{
304	void *rem_buf;
305	off_t rem_off;
306	size_t rem_size;
307	int rc, aio_errno;
308	ssize_t aio_ret, written;
309
310	aio_errno = aio_error(cblock);
311	if (aio_errno == EINPROGRESS)
312	return `0`;
313
314	written = aio_ret = aio_return(cblock);
315	if (aio_ret < `0`) {
316	if (aio_errno != EINTR)
317	pr_err("failed to write perf data, error: %m\n");
318	written = `0`;
319	}
320
321	rem_size = cblock->aio_nbytes - written;
322
323	if (rem_size == `0`) {
324	cblock->aio_fildes = -`1`;
325	/*
326	* md->refcount is incremented in record__aio_pushfn() for
327	* every aio write request started in record__aio_push() so
328	* decrement it because the request is now complete.
329	*/
330	perf_mmap__put(&md->core);
331	rc = `1`;
332	} else {
333	/*
334	* aio write request may require restart with the
335	* reminder if the kernel didn't write whole
336	* chunk at once.
337	*/
338	rem_off = cblock->aio_offset + written;
339	rem_buf = (void *)(cblock->aio_buf + written);
340	record__aio_write(cblock, cblock->aio_fildes,
341	rem_buf, rem_size, rem_off);
342	rc = `0`;
343	}
344
345	return rc;
346	}
347
348	static int record__aio_sync(struct mmap *md, bool sync_all)
349	{
350	struct aiocb **aiocb = md->aio.aiocb;
351	struct aiocb *cblocks = md->aio.cblocks;
352	struct timespec timeout = { `0`, `1000` * `1000` * `1` }; / 1ms /
353	int i, do_suspend;
354
355	do {
356	do_suspend = `0`;
357	for (i = `0`; i < md->aio.nr_cblocks; ++i) {
358	if (cblocks[i].aio_fildes == -`1` \|\| record__aio_complete(md, &cblocks[i])) {
359	if (sync_all)
360	aiocb[i] = NULL;
361	else
362	return i;
363	} else {
364	/*
365	* Started aio write is not complete yet
366	* so it has to be waited before the
367	* next allocation.
368	*/
369	aiocb[i] = &cblocks[i];
370	do_suspend = `1`;
371	}
372	}
373	if (!do_suspend)
374	return -`1`;
375
376	while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
377	if (!(errno == EAGAIN \|\| errno == EINTR))
378	pr_err("failed to sync perf data, error: %m\n");
379	}
380	} while (`1`);
381	}
382
383	struct record_aio {
384	struct record *rec;
385	void *data;
386	size_t size;
387	};
388
389	static int record__aio_pushfn(struct mmap map, void* to, void* *buf, size_t size)
390	{
391	struct record_aio *aio = to;
392
393	/*
394	* map->core.base data pointed by buf is copied into free map->aio.data[] buffer
395	* to release space in the kernel buffer as fast as possible, calling
396	* perf_mmap__consume() from perf_mmap__push() function.
397	*
398	* That lets the kernel to proceed with storing more profiling data into
399	* the kernel buffer earlier than other per-cpu kernel buffers are handled.
400	*
401	* Coping can be done in two steps in case the chunk of profiling data
402	* crosses the upper bound of the kernel buffer. In this case we first move
403	* part of data from map->start till the upper bound and then the reminder
404	* from the beginning of the kernel buffer till the end of the data chunk.
405	*/
406
407	if (record__comp_enabled(aio->rec)) {
408	size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
409	mmap__mmap_len(map) - aio->size,
410	buf, size);
411	} else {
412	memcpy(aio->data + aio->size, buf, size);
413	}
414
415	if (!aio->size) {
416	/*
417	* Increment map->refcount to guard map->aio.data[] buffer
418	* from premature deallocation because map object can be
419	* released earlier than aio write request started on
420	* map->aio.data[] buffer is complete.
421	*
422	* perf_mmap__put() is done at record__aio_complete()
423	* after started aio request completion or at record__aio_push()
424	* if the request failed to start.
425	*/
426	perf_mmap__get(&map->core);
427	}
428
429	aio->size += size;
430
431	return size;
432	}
433
434	static int record__aio_push(struct record rec, struct* mmap map, off_t off)
435	{
436	int ret, idx;
437	int trace_fd = rec->session->data->file.fd;
438	struct record_aio aio = { .rec = rec, .size = `0` };
439
440	/*
441	* Call record__aio_sync() to wait till map->aio.data[] buffer
442	* becomes available after previous aio write operation.
443	*/
444
445	idx = record__aio_sync(map, false);
446	aio.data = map->aio.data[idx];
447	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
448	if (ret != `0`) / ret > 0 - no data, ret < 0 - error /
449	return ret;
450
451	rec->samples++;
452	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
453	if (!ret) {
454	*off += aio.size;
455	rec->bytes_written += aio.size;
456	if (switch_output_size(rec))
457	trigger_hit(&switch_output_trigger);
458	} else {
459	/*
460	* Decrement map->refcount incremented in record__aio_pushfn()
461	* back if record__aio_write() operation failed to start, otherwise
462	* map->refcount is decremented in record__aio_complete() after
463	* aio write operation finishes successfully.
464	*/
465	perf_mmap__put(&map->core);
466	}
467
468	return ret;
469	}
470
471	static off_t record__aio_get_pos(int trace_fd)
472	{
473	return lseek(trace_fd, `0`, SEEK_CUR);
474	}
475
476	static void record__aio_set_pos(int trace_fd, off_t pos)
477	{
478	lseek(trace_fd, pos, SEEK_SET);
479	}
480
481	static void record__aio_mmap_read_sync(struct record *rec)
482	{
483	int i;
484	struct evlist *evlist = rec->evlist;
485	struct mmap *maps = evlist->mmap;
486
487	if (!record__aio_enabled(rec))
488	return;
489
490	for (i = `0`; i < evlist->core.nr_mmaps; i++) {
491	struct mmap *map = &maps[i];
492
493	if (map->core.base)
494	record__aio_sync(map, true);
495	}
496	}
497
498	static int nr_cblocks_default = `1`;
499	static int nr_cblocks_max = `4`;
500
501	static int record__aio_parse(const struct option *opt,
502	const char *str,
503	int unset)
504	{
505	struct record_opts opts = (struct* record_opts *)opt->value;
506
507	if (unset) {
508	opts->nr_cblocks = `0`;
509	} else {
510	if (str)
511	opts->nr_cblocks = strtol(str, NULL, `0`);
512	if (!opts->nr_cblocks)
513	opts->nr_cblocks = nr_cblocks_default;
514	}
515
516	return `0`;
517	}
518	#else /* HAVE_AIO_SUPPORT */
519	static int nr_cblocks_max = `0`;
520
521	static int record__aio_push(struct record rec __maybe_unused, struct* mmap *map __maybe_unused,
522	off_t *off __maybe_unused)
523	{
524	return -`1`;
525	}
526
527	static off_t record__aio_get_pos(int trace_fd __maybe_unused)
528	{
529	return -`1`;
530	}
531
532	static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
533	{
534	}
535
536	static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
537	{
538	}
539	#endif
540
541	static int record__aio_enabled(struct record *rec)
542	{
543	return rec->opts.nr_cblocks > `0`;
544	}
545
546	#define MMAP_FLUSH_DEFAULT 1
547	static int record__mmap_flush_parse(const struct option *opt,
548	const char *str,
549	int unset)
550	{
551	int flush_max;
552	struct record_opts opts = (struct* record_opts *)opt->value;
553	static struct parse_tag tags[] = {
554	{ .tag = `'B'`, .mult = `1` },
555	{ .tag = `'K'`, .mult = `1` << `10` },
556	{ .tag = `'M'`, .mult = `1` << `20` },
557	{ .tag = `'G'`, .mult = `1` << `30` },
558	{ .tag = `0` },
559	};
560
561	if (unset)
562	return `0`;
563
564	if (str) {
565	opts->mmap_flush = parse_tag_value(str, tags);
566	if (opts->mmap_flush == (int)-`1`)
567	opts->mmap_flush = strtol(str, NULL, `0`);
568	}
569
570	if (!opts->mmap_flush)
571	opts->mmap_flush = MMAP_FLUSH_DEFAULT;
572
573	flush_max = evlist__mmap_size(pages: opts->mmap_pages);
574	flush_max /= `4`;
575	if (opts->mmap_flush > flush_max)
576	opts->mmap_flush = flush_max;
577
578	return `0`;
579	}
580
581	#ifdef HAVE_ZSTD_SUPPORT
582	static unsigned int comp_level_default = `1`;
583
584	static int record__parse_comp_level(const struct option opt, const* char str, int* unset)
585	{
586	struct record_opts *opts = opt->value;
587
588	if (unset) {
589	opts->comp_level = `0`;
590	} else {
591	if (str)
592	opts->comp_level = strtol(str, NULL, `0`);
593	if (!opts->comp_level)
594	opts->comp_level = comp_level_default;
595	}
596
597	return `0`;
598	}
599	#endif
600	static unsigned int comp_level_max = `22`;
601
602	static int record__comp_enabled(struct record *rec)
603	{
604	return rec->opts.comp_level > `0`;
605	}
606
607	static int process_synthesized_event(struct perf_tool *tool,
608	union perf_event *event,
609	struct perf_sample *sample __maybe_unused,
610	struct machine *machine __maybe_unused)
611	{
612	struct record rec = container_of(tool, struct* record, tool);
613	return record__write(rec, NULL, bf: event, size: event->header.size);
614	}
615
616	static struct mutex synth_lock;
617
618	static int process_locked_synthesized_event(struct perf_tool *tool,
619	union perf_event *event,
620	struct perf_sample *sample __maybe_unused,
621	struct machine *machine __maybe_unused)
622	{
623	int ret;
624
625	mutex_lock(&synth_lock);
626	ret = process_synthesized_event(tool, event, sample, machine);
627	mutex_unlock(lock: &synth_lock);
628	return ret;
629	}
630
631	static int record__pushfn(struct mmap map, void* to, void* *bf, size_t size)
632	{
633	struct record *rec = to;
634
635	if (record__comp_enabled(rec)) {
636	size = zstd_compress(session: rec->session, map, dst: map->data, dst_size: mmap__mmap_len(map), src: bf, src_size: size);
637	bf = map->data;
638	}
639
640	thread->samples++;
641	return record__write(rec, map, bf, size);
642	}
643
644	static volatile sig_atomic_t signr = -`1`;
645	static volatile sig_atomic_t child_finished;
646	#ifdef HAVE_EVENTFD_SUPPORT
647	static volatile sig_atomic_t done_fd = -`1`;
648	#endif
649
650	static void sig_handler(int sig)
651	{
652	if (sig == SIGCHLD)
653	child_finished = `1`;
654	else
655	signr = sig;
656
657	done = `1`;
658	#ifdef HAVE_EVENTFD_SUPPORT
659	if (done_fd >= `0`) {
660	u64 tmp = `1`;
661	int orig_errno = errno;
662
663	/*
664	* It is possible for this signal handler to run after done is
665	* checked in the main loop, but before the perf counter fds are
666	* polled. If this happens, the poll() will continue to wait
667	* even though done is set, and will only break out if either
668	* another signal is received, or the counters are ready for
669	* read. To ensure the poll() doesn't sleep when done is set,
670	* use an eventfd (done_fd) to wake up the poll().
671	*/
672	if (write(done_fd, &tmp, sizeof(tmp)) < `0`)
673	pr_err("failed to signal wakeup fd, error: %m\n");
674
675	errno = orig_errno;
676	}
677	#endif // HAVE_EVENTFD_SUPPORT
678	}
679
680	static void sigsegv_handler(int sig)
681	{
682	perf_hooks__recover();
683	sighandler_dump_stack(sig);
684	}
685
686	static void record__sig_exit(void)
687	{
688	if (signr == -`1`)
689	return;
690
691	signal(signr, SIG_DFL);
692	raise(signr);
693	}
694
695	#ifdef HAVE_AUXTRACE_SUPPORT
696
697	static int record__process_auxtrace(struct perf_tool *tool,
698	struct mmap *map,
699	union perf_event event, void* *data1,
700	size_t len1, void *data2, size_t len2)
701	{
702	struct record rec = container_of(tool, struct* record, tool);
703	struct perf_data *data = &rec->data;
704	size_t padding;
705	u8 pad[`8`] = {`0`};
706
707	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
708	off_t file_offset;
709	int fd = perf_data__fd(data);
710	int err;
711
712	file_offset = lseek(fd, `0`, SEEK_CUR);
713	if (file_offset == -`1`)
714	return -`1`;
715	err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
716	event, file_offset);
717	if (err)
718	return err;
719	}
720
721	/ event.auxtrace.size includes padding, see __auxtrace_mmap__read() /
722	padding = (len1 + len2) & `7`;
723	if (padding)
724	padding = `8` - padding;
725
726	record__write(rec, map, event, event->header.size);
727	record__write(rec, map, data1, len1);
728	if (len2)
729	record__write(rec, map, data2, len2);
730	record__write(rec, map, &pad, padding);
731
732	return `0`;
733	}
734
735	static int record__auxtrace_mmap_read(struct record *rec,
736	struct mmap *map)
737	{
738	int ret;
739
740	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
741	record__process_auxtrace);
742	if (ret < `0`)
743	return ret;
744
745	if (ret)
746	rec->samples++;
747
748	return `0`;
749	}
750
751	static int record__auxtrace_mmap_read_snapshot(struct record *rec,
752	struct mmap *map)
753	{
754	int ret;
755
756	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
757	record__process_auxtrace,
758	rec->opts.auxtrace_snapshot_size);
759	if (ret < `0`)
760	return ret;
761
762	if (ret)
763	rec->samples++;
764
765	return `0`;
766	}
767
768	static int record__auxtrace_read_snapshot_all(struct record *rec)
769	{
770	int i;
771	int rc = `0`;
772
773	for (i = `0`; i < rec->evlist->core.nr_mmaps; i++) {
774	struct mmap *map = &rec->evlist->mmap[i];
775
776	if (!map->auxtrace_mmap.base)
777	continue;
778
779	if (record__auxtrace_mmap_read_snapshot(rec, map) != `0`) {
780	rc = -`1`;
781	goto out;
782	}
783	}
784	out:
785	return rc;
786	}
787
788	static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
789	{
790	pr_debug("Recording AUX area tracing snapshot\n");
791	if (record__auxtrace_read_snapshot_all(rec) < `0`) {
792	trigger_error(&auxtrace_snapshot_trigger);
793	} else {
794	if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
795	trigger_error(&auxtrace_snapshot_trigger);
796	else
797	trigger_ready(&auxtrace_snapshot_trigger);
798	}
799	}
800
801	static int record__auxtrace_snapshot_exit(struct record *rec)
802	{
803	if (trigger_is_error(&auxtrace_snapshot_trigger))
804	return `0`;
805
806	if (!auxtrace_record__snapshot_started &&
807	auxtrace_record__snapshot_start(rec->itr))
808	return -`1`;
809
810	record__read_auxtrace_snapshot(rec, true);
811	if (trigger_is_error(&auxtrace_snapshot_trigger))
812	return -`1`;
813
814	return `0`;
815	}
816
817	static int record__auxtrace_init(struct record *rec)
818	{
819	int err;
820
821	if ((rec->opts.auxtrace_snapshot_opts \|\| rec->opts.auxtrace_sample_opts)
822	&& record__threads_enabled(rec)) {
823	pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
824	return -EINVAL;
825	}
826
827	if (!rec->itr) {
828	rec->itr = auxtrace_record__init(rec->evlist, &err);
829	if (err)
830	return err;
831	}
832
833	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
834	rec->opts.auxtrace_snapshot_opts);
835	if (err)
836	return err;
837
838	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
839	rec->opts.auxtrace_sample_opts);
840	if (err)
841	return err;
842
843	auxtrace_regroup_aux_output(rec->evlist);
844
845	return auxtrace_parse_filters(rec->evlist);
846	}
847
848	#else
849
850	static inline
851	int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
852	struct mmap *map __maybe_unused)
853	{
854	return `0`;
855	}
856
857	static inline
858	void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
859	bool on_exit __maybe_unused)
860	{
861	}
862
863	static inline
864	int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
865	{
866	return `0`;
867	}
868
869	static inline
870	int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
871	{
872	return `0`;
873	}
874
875	static int record__auxtrace_init(struct record *rec __maybe_unused)
876	{
877	return `0`;
878	}
879
880	#endif
881
882	static int record__config_text_poke(struct evlist *evlist)
883	{
884	struct evsel *evsel;
885
886	/ Nothing to do if text poke is already configured /
887	evlist__for_each_entry(evlist, evsel) {
888	if (evsel->core.attr.text_poke)
889	return `0`;
890	}
891
892	evsel = evlist__add_dummy_on_all_cpus(evlist);
893	if (!evsel)
894	return -ENOMEM;
895
896	evsel->core.attr.text_poke = `1`;
897	evsel->core.attr.ksymbol = `1`;
898	evsel->immediate = true;
899	evsel__set_sample_bit(evsel, TIME);
900
901	return `0`;
902	}
903
904	static int record__config_off_cpu(struct record *rec)
905	{
906	return off_cpu_prepare(evlist: rec->evlist, target: &rec->opts.target, opts: &rec->opts);
907	}
908
909	static bool record__tracking_system_wide(struct record *rec)
910	{
911	struct evlist *evlist = rec->evlist;
912	struct evsel *evsel;
913
914	/*
915	* If non-dummy evsel exists, system_wide sideband is need to
916	* help parse sample information.
917	* For example, PERF_EVENT_MMAP event to help parse symbol,
918	* and PERF_EVENT_COMM event to help parse task executable name.
919	*/
920	evlist__for_each_entry(evlist, evsel) {
921	if (!evsel__is_dummy_event(evsel))
922	return true;
923	}
924
925	return false;
926	}
927
928	static int record__config_tracking_events(struct record *rec)
929	{
930	struct record_opts *opts = &rec->opts;
931	struct evlist *evlist = rec->evlist;
932	bool system_wide = false;
933	struct evsel *evsel;
934
935	/*
936	* For initial_delay, system wide or a hybrid system, we need to add
937	* tracking event so that we can track PERF_RECORD_MMAP to cover the
938	* delay of waiting or event synthesis.
939	*/
940	if (opts->target.initial_delay \|\| target__has_cpu(target: &opts->target) \|\|
941	perf_pmus__num_core_pmus() > `1`) {
942
943	/*
944	* User space tasks can migrate between CPUs, so when tracing
945	* selected CPUs, sideband for all CPUs is still needed.
946	*/
947	if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
948	system_wide = true;
949
950	evsel = evlist__findnew_tracking_event(evlist, system_wide);
951	if (!evsel)
952	return -ENOMEM;
953
954	/*
955	* Enable the tracking event when the process is forked for
956	* initial_delay, immediately for system wide.
957	*/
958	if (opts->target.initial_delay && !evsel->immediate &&
959	!target__has_cpu(target: &opts->target))
960	evsel->core.attr.enable_on_exec = `1`;
961	else
962	evsel->immediate = `1`;
963	}
964
965	return `0`;
966	}
967
968	static bool record__kcore_readable(struct machine *machine)
969	{
970	char kcore[PATH_MAX];
971	int fd;
972
973	scnprintf(buf: kcore, size: sizeof(kcore), fmt: "%s/proc/kcore", machine->root_dir);
974
975	fd = open(kcore, O_RDONLY);
976	if (fd < `0`)
977	return false;
978
979	close(fd);
980
981	return true;
982	}
983
984	static int record__kcore_copy(struct machine machine, struct* perf_data *data)
985	{
986	char from_dir[PATH_MAX];
987	char kcore_dir[PATH_MAX];
988	int ret;
989
990	snprintf(buf: from_dir, size: sizeof(from_dir), fmt: "%s/proc", machine->root_dir);
991
992	ret = perf_data__make_kcore_dir(data, buf: kcore_dir, buf_sz: sizeof(kcore_dir));
993	if (ret)
994	return ret;
995
996	return kcore_copy(from_dir, to_dir: kcore_dir);
997	}
998
999	static void record__thread_data_init_pipes(struct record_thread *thread_data)
1000	{
1001	thread_data->pipes.msg[`0`] = -`1`;
1002	thread_data->pipes.msg[`1`] = -`1`;
1003	thread_data->pipes.ack[`0`] = -`1`;
1004	thread_data->pipes.ack[`1`] = -`1`;
1005	}
1006
1007	static int record__thread_data_open_pipes(struct record_thread *thread_data)
1008	{
1009	if (pipe(thread_data->pipes.msg))
1010	return -EINVAL;
1011
1012	if (pipe(thread_data->pipes.ack)) {
1013	close(thread_data->pipes.msg[`0`]);
1014	thread_data->pipes.msg[`0`] = -`1`;
1015	close(thread_data->pipes.msg[`1`]);
1016	thread_data->pipes.msg[`1`] = -`1`;
1017	return -EINVAL;
1018	}
1019
1020	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1021	thread_data->pipes.msg[`0`], thread_data->pipes.msg[`1`],
1022	thread_data->pipes.ack[`0`], thread_data->pipes.ack[`1`]);
1023
1024	return `0`;
1025	}
1026
1027	static void record__thread_data_close_pipes(struct record_thread *thread_data)
1028	{
1029	if (thread_data->pipes.msg[`0`] != -`1`) {
1030	close(thread_data->pipes.msg[`0`]);
1031	thread_data->pipes.msg[`0`] = -`1`;
1032	}
1033	if (thread_data->pipes.msg[`1`] != -`1`) {
1034	close(thread_data->pipes.msg[`1`]);
1035	thread_data->pipes.msg[`1`] = -`1`;
1036	}
1037	if (thread_data->pipes.ack[`0`] != -`1`) {
1038	close(thread_data->pipes.ack[`0`]);
1039	thread_data->pipes.ack[`0`] = -`1`;
1040	}
1041	if (thread_data->pipes.ack[`1`] != -`1`) {
1042	close(thread_data->pipes.ack[`1`]);
1043	thread_data->pipes.ack[`1`] = -`1`;
1044	}
1045	}
1046
1047	static bool evlist__per_thread(struct evlist *evlist)
1048	{
1049	return cpu_map__is_dummy(cpus: evlist->core.user_requested_cpus);
1050	}
1051
1052	static int record__thread_data_init_maps(struct record_thread thread_data, struct* evlist *evlist)
1053	{
1054	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1055	struct mmap *mmap = evlist->mmap;
1056	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1057	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1058	bool per_thread = evlist__per_thread(evlist);
1059
1060	if (per_thread)
1061	thread_data->nr_mmaps = nr_mmaps;
1062	else
1063	thread_data->nr_mmaps = bitmap_weight(src: thread_data->mask->maps.bits,
1064	nbits: thread_data->mask->maps.nbits);
1065	if (mmap) {
1066	thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1067	if (!thread_data->maps)
1068	return -ENOMEM;
1069	}
1070	if (overwrite_mmap) {
1071	thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1072	if (!thread_data->overwrite_maps) {
1073	zfree(&thread_data->maps);
1074	return -ENOMEM;
1075	}
1076	}
1077	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1078	thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1079
1080	for (m = `0`, tm = `0`; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1081	if (per_thread \|\|
1082	test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1083	if (thread_data->maps) {
1084	thread_data->maps[tm] = &mmap[m];
1085	pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1086	thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1087	}
1088	if (thread_data->overwrite_maps) {
1089	thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1090	pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1091	thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1092	}
1093	tm++;
1094	}
1095	}
1096
1097	return `0`;
1098	}
1099
1100	static int record__thread_data_init_pollfd(struct record_thread thread_data, struct* evlist *evlist)
1101	{
1102	int f, tm, pos;
1103	struct mmap map, overwrite_map;
1104
1105	fdarray__init(&thread_data->pollfd, `64`);
1106
1107	for (tm = `0`; tm < thread_data->nr_mmaps; tm++) {
1108	map = thread_data->maps ? thread_data->maps[tm] : NULL;
1109	overwrite_map = thread_data->overwrite_maps ?
1110	thread_data->overwrite_maps[tm] : NULL;
1111
1112	for (f = `0`; f < evlist->core.pollfd.nr; f++) {
1113	void *ptr = evlist->core.pollfd.priv[f].ptr;
1114
1115	if ((map && ptr == map) \|\| (overwrite_map && ptr == overwrite_map)) {
1116	pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1117	&evlist->core.pollfd);
1118	if (pos < `0`)
1119	return pos;
1120	pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1121	thread_data, pos, evlist->core.pollfd.entries[f].fd);
1122	}
1123	}
1124	}
1125
1126	return `0`;
1127	}
1128
1129	static void record__free_thread_data(struct record *rec)
1130	{
1131	int t;
1132	struct record_thread *thread_data = rec->thread_data;
1133
1134	if (thread_data == NULL)
1135	return;
1136
1137	for (t = `0`; t < rec->nr_threads; t++) {
1138	record__thread_data_close_pipes(thread_data: &thread_data[t]);
1139	zfree(&thread_data[t].maps);
1140	zfree(&thread_data[t].overwrite_maps);
1141	fdarray__exit(&thread_data[t].pollfd);
1142	}
1143
1144	zfree(&rec->thread_data);
1145	}
1146
1147	static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1148	int evlist_pollfd_index,
1149	int thread_pollfd_index)
1150	{
1151	size_t x = rec->index_map_cnt;
1152
1153	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1154	return -ENOMEM;
1155	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1156	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1157	rec->index_map_cnt += `1`;
1158	return `0`;
1159	}
1160
1161	static int record__update_evlist_pollfd_from_thread(struct record *rec,
1162	struct evlist *evlist,
1163	struct record_thread *thread_data)
1164	{
1165	struct pollfd *e_entries = evlist->core.pollfd.entries;
1166	struct pollfd *t_entries = thread_data->pollfd.entries;
1167	int err = `0`;
1168	size_t i;
1169
1170	for (i = `0`; i < rec->index_map_cnt; i++) {
1171	int e_pos = rec->index_map[i].evlist_pollfd_index;
1172	int t_pos = rec->index_map[i].thread_pollfd_index;
1173
1174	if (e_entries[e_pos].fd != t_entries[t_pos].fd \|\|
1175	e_entries[e_pos].events != t_entries[t_pos].events) {
1176	pr_err("Thread and evlist pollfd index mismatch\n");
1177	err = -EINVAL;
1178	continue;
1179	}
1180	e_entries[e_pos].revents = t_entries[t_pos].revents;
1181	}
1182	return err;
1183	}
1184
1185	static int record__dup_non_perf_events(struct record *rec,
1186	struct evlist *evlist,
1187	struct record_thread *thread_data)
1188	{
1189	struct fdarray *fda = &evlist->core.pollfd;
1190	int i, ret;
1191
1192	for (i = `0`; i < fda->nr; i++) {
1193	if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1194	continue;
1195	ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1196	if (ret < `0`) {
1197	pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1198	return ret;
1199	}
1200	pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1201	thread_data, ret, fda->entries[i].fd);
1202	ret = record__map_thread_evlist_pollfd_indexes(rec, evlist_pollfd_index: i, thread_pollfd_index: ret);
1203	if (ret < `0`) {
1204	pr_err("Failed to map thread and evlist pollfd indexes\n");
1205	return ret;
1206	}
1207	}
1208	return `0`;
1209	}
1210
1211	static int record__alloc_thread_data(struct record rec, struct* evlist *evlist)
1212	{
1213	int t, ret;
1214	struct record_thread *thread_data;
1215
1216	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1217	if (!rec->thread_data) {
1218	pr_err("Failed to allocate thread data\n");
1219	return -ENOMEM;
1220	}
1221	thread_data = rec->thread_data;
1222
1223	for (t = `0`; t < rec->nr_threads; t++)
1224	record__thread_data_init_pipes(thread_data: &thread_data[t]);
1225
1226	for (t = `0`; t < rec->nr_threads; t++) {
1227	thread_data[t].rec = rec;
1228	thread_data[t].mask = &rec->thread_masks[t];
1229	ret = record__thread_data_init_maps(thread_data: &thread_data[t], evlist);
1230	if (ret) {
1231	pr_err("Failed to initialize thread[%d] maps\n", t);
1232	goto out_free;
1233	}
1234	ret = record__thread_data_init_pollfd(thread_data: &thread_data[t], evlist);
1235	if (ret) {
1236	pr_err("Failed to initialize thread[%d] pollfd\n", t);
1237	goto out_free;
1238	}
1239	if (t) {
1240	thread_data[t].tid = -`1`;
1241	ret = record__thread_data_open_pipes(thread_data: &thread_data[t]);
1242	if (ret) {
1243	pr_err("Failed to open thread[%d] communication pipes\n", t);
1244	goto out_free;
1245	}
1246	ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[`0`],
1247	POLLIN \| POLLERR \| POLLHUP, fdarray_flag__nonfilterable);
1248	if (ret < `0`) {
1249	pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1250	goto out_free;
1251	}
1252	thread_data[t].ctlfd_pos = ret;
1253	pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1254	thread_data, thread_data[t].ctlfd_pos,
1255	thread_data[t].pipes.msg[`0`]);
1256	} else {
1257	thread_data[t].tid = gettid();
1258
1259	ret = record__dup_non_perf_events(rec, evlist, thread_data: &thread_data[t]);
1260	if (ret < `0`)
1261	goto out_free;
1262
1263	thread_data[t].ctlfd_pos = -`1`; / Not used /
1264	}
1265	}
1266
1267	return `0`;
1268
1269	out_free:
1270	record__free_thread_data(rec);
1271
1272	return ret;
1273	}
1274
1275	static int record__mmap_evlist(struct record *rec,
1276	struct evlist *evlist)
1277	{
1278	int i, ret;
1279	struct record_opts *opts = &rec->opts;
1280	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode \|\|
1281	opts->auxtrace_sample_mode;
1282	char msg[`512`];
1283
1284	if (opts->affinity != PERF_AFFINITY_SYS)
1285	cpu__setup_cpunode_map();
1286
1287	if (evlist__mmap_ex(evlist, pages: opts->mmap_pages,
1288	auxtrace_pages: opts->auxtrace_mmap_pages,
1289	auxtrace_overwrite,
1290	nr_cblocks: opts->nr_cblocks, affinity: opts->affinity,
1291	flush: opts->mmap_flush, comp_level: opts->comp_level) < `0`) {
1292	if (errno == EPERM) {
1293	pr_err("Permission error mapping pages.\n"
1294	"Consider increasing "
1295	"/proc/sys/kernel/perf_event_mlock_kb,\n"
1296	"or try again with a smaller value of -m/--mmap_pages.\n"
1297	"(current value: %u,%u)\n",
1298	opts->mmap_pages, opts->auxtrace_mmap_pages);
1299	return -errno;
1300	} else {
1301	pr_err("failed to mmap with %d (%s)\n", errno,
1302	str_error_r(errno, msg, sizeof(msg)));
1303	if (errno)
1304	return -errno;
1305	else
1306	return -EINVAL;
1307	}
1308	}
1309
1310	if (evlist__initialize_ctlfd(evlist, ctl_fd: opts->ctl_fd, ctl_fd_ack: opts->ctl_fd_ack))
1311	return -`1`;
1312
1313	ret = record__alloc_thread_data(rec, evlist);
1314	if (ret)
1315	return ret;
1316
1317	if (record__threads_enabled(rec)) {
1318	ret = perf_data__create_dir(data: &rec->data, nr: evlist->core.nr_mmaps);
1319	if (ret) {
1320	pr_err("Failed to create data directory: %s\n", strerror(-ret));
1321	return ret;
1322	}
1323	for (i = `0`; i < evlist->core.nr_mmaps; i++) {
1324	if (evlist->mmap)
1325	evlist->mmap[i].file = &rec->data.dir.files[i];
1326	if (evlist->overwrite_mmap)
1327	evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1328	}
1329	}
1330
1331	return `0`;
1332	}
1333
1334	static int record__mmap(struct record *rec)
1335	{
1336	return record__mmap_evlist(rec, evlist: rec->evlist);
1337	}
1338
1339	static int record__open(struct record *rec)
1340	{
1341	char msg[BUFSIZ];
1342	struct evsel *pos;
1343	struct evlist *evlist = rec->evlist;
1344	struct perf_session *session = rec->session;
1345	struct record_opts *opts = &rec->opts;
1346	int rc = `0`;
1347
1348	evlist__config(evlist, opts, callchain: &callchain_param);
1349
1350	evlist__for_each_entry(evlist, pos) {
1351	try_again:
1352	if (evsel__open(evsel: pos, cpus: pos->core.cpus, threads: pos->core.threads) < `0`) {
1353	if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1354	if (verbose > `0`)
1355	ui__warning(format: "%s\n", msg);
1356	goto try_again;
1357	}
1358	if ((errno == EINVAL \|\| errno == EBADF) &&
1359	pos->core.leader != &pos->core &&
1360	pos->weak_group) {
1361	pos = evlist__reset_weak_group(evlist, evsel: pos, close: true);
1362	goto try_again;
1363	}
1364	rc = -errno;
1365	evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1366	ui__error(format: "%s\n", msg);
1367	goto out;
1368	}
1369
1370	pos->supported = true;
1371	}
1372
1373	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1374	pr_warning(
1375	"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1376	"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1377	"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1378	"file is not found in the buildid cache or in the vmlinux path.\n\n"
1379	"Samples in kernel modules won't be resolved at all.\n\n"
1380	"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1381	"even with a suitable vmlinux or kallsyms file.\n\n");
1382	}
1383
1384	if (evlist__apply_filters(evlist, err_evsel: &pos)) {
1385	pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1386	pos->filter ?: "BPF", evsel__name(pos), errno,
1387	str_error_r(errno, msg, sizeof(msg)));
1388	rc = -`1`;
1389	goto out;
1390	}
1391
1392	rc = record__mmap(rec);
1393	if (rc)
1394	goto out;
1395
1396	session->evlist = evlist;
1397	perf_session__set_id_hdr_size(session);
1398	out:
1399	return rc;
1400	}
1401
1402	static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1403	{
1404	if (rec->evlist->first_sample_time == `0`)
1405	rec->evlist->first_sample_time = sample_time;
1406
1407	if (sample_time)
1408	rec->evlist->last_sample_time = sample_time;
1409	}
1410
1411	static int process_sample_event(struct perf_tool *tool,
1412	union perf_event *event,
1413	struct perf_sample *sample,
1414	struct evsel *evsel,
1415	struct machine *machine)
1416	{
1417	struct record rec = container_of(tool, struct* record, tool);
1418
1419	set_timestamp_boundary(rec, sample_time: sample->time);
1420
1421	if (rec->buildid_all)
1422	return `0`;
1423
1424	rec->samples++;
1425	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1426	}
1427
1428	static int process_buildids(struct record *rec)
1429	{
1430	struct perf_session *session = rec->session;
1431
1432	if (perf_data__size(data: &rec->data) == `0`)
1433	return `0`;
1434
1435	/*
1436	* During this process, it'll load kernel map and replace the
1437	* dso->long_name to a real pathname it found. In this case
1438	* we prefer the vmlinux path like
1439	* /lib/modules/3.16.4/build/vmlinux
1440	*
1441	* rather than build-id path (in debug directory).
1442	* $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1443	*/
1444	symbol_conf.ignore_vmlinux_buildid = true;
1445
1446	/*
1447	* If --buildid-all is given, it marks all DSO regardless of hits,
1448	* so no need to process samples. But if timestamp_boundary is enabled,
1449	* it still needs to walk on all samples to get the timestamps of
1450	* first/last samples.
1451	*/
1452	if (rec->buildid_all && !rec->timestamp_boundary)
1453	rec->tool.sample = NULL;
1454
1455	return perf_session__process_events(session);
1456	}
1457
1458	static void perf_event__synthesize_guest_os(struct machine machine, void* *data)
1459	{
1460	int err;
1461	struct perf_tool *tool = data;
1462	/*
1463	*As for guest kernel when processing subcommand record&report,
1464	*we arrange module mmap prior to guest kernel mmap and trigger
1465	*a preload dso because default guest module symbols are loaded
1466	*from guest kallsyms instead of /lib/modules/XXX/XXX. This
1467	*method is used to avoid symbol missing when the first addr is
1468	*in module instead of in guest kernel.
1469	*/
1470	err = perf_event__synthesize_modules(tool, process: process_synthesized_event,
1471	machine);
1472	if (err < `0`)
1473	pr_err("Couldn't record guest kernel [%d]'s reference"
1474	" relocation symbol.\n", machine->pid);
1475
1476	/*
1477	* We use _stext for guest kernel because guest kernel's /proc/kallsyms
1478	* have no _text sometimes.
1479	*/
1480	err = perf_event__synthesize_kernel_mmap(tool, process: process_synthesized_event,
1481	machine);
1482	if (err < `0`)
1483	pr_err("Couldn't record guest kernel [%d]'s reference"
1484	" relocation symbol.\n", machine->pid);
1485	}
1486
1487	static struct perf_event_header finished_round_event = {
1488	.size = sizeof(struct perf_event_header),
1489	.type = PERF_RECORD_FINISHED_ROUND,
1490	};
1491
1492	static struct perf_event_header finished_init_event = {
1493	.size = sizeof(struct perf_event_header),
1494	.type = PERF_RECORD_FINISHED_INIT,
1495	};
1496
1497	static void record__adjust_affinity(struct record rec, struct* mmap *map)
1498	{
1499	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1500	!bitmap_equal(src1: thread->mask->affinity.bits, src2: map->affinity_mask.bits,
1501	nbits: thread->mask->affinity.nbits)) {
1502	bitmap_zero(dst: thread->mask->affinity.bits, nbits: thread->mask->affinity.nbits);
1503	bitmap_or(dst: thread->mask->affinity.bits, src1: thread->mask->affinity.bits,
1504	src2: map->affinity_mask.bits, nbits: thread->mask->affinity.nbits);
1505	sched_setaffinity(`0`, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1506	(cpu_set_t *)thread->mask->affinity.bits);
1507	if (verbose == `2`) {
1508	pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1509	mmap_cpu_mask__scnprintf(mask: &thread->mask->affinity, tag: "affinity");
1510	}
1511	}
1512	}
1513
1514	static size_t process_comp_header(void *record, size_t increment)
1515	{
1516	struct perf_record_compressed *event = record;
1517	size_t size = sizeof(*event);
1518
1519	if (increment) {
1520	event->header.size += increment;
1521	return increment;
1522	}
1523
1524	event->header.type = PERF_RECORD_COMPRESSED;
1525	event->header.size = size;
1526
1527	return size;
1528	}
1529
1530	static size_t zstd_compress(struct perf_session session, struct* mmap *map,
1531	void dst, size_t dst_size, void* *src, size_t src_size)
1532	{
1533	size_t compressed;
1534	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - `1`;
1535	struct zstd_data *zstd_data = &session->zstd_data;
1536
1537	if (map && map->file)
1538	zstd_data = &map->zstd_data;
1539
1540	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1541	max_record_size, process_comp_header);
1542
1543	if (map && map->file) {
1544	thread->bytes_transferred += src_size;
1545	thread->bytes_compressed += compressed;
1546	} else {
1547	session->bytes_transferred += src_size;
1548	session->bytes_compressed += compressed;
1549	}
1550
1551	return compressed;
1552	}
1553
1554	static int record__mmap_read_evlist(struct record rec, struct* evlist *evlist,
1555	bool overwrite, bool synch)
1556	{
1557	u64 bytes_written = rec->bytes_written;
1558	int i;
1559	int rc = `0`;
1560	int nr_mmaps;
1561	struct mmap **maps;
1562	int trace_fd = rec->data.file.fd;
1563	off_t off = `0`;
1564
1565	if (!evlist)
1566	return `0`;
1567
1568	nr_mmaps = thread->nr_mmaps;
1569	maps = overwrite ? thread->overwrite_maps : thread->maps;
1570
1571	if (!maps)
1572	return `0`;
1573
1574	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1575	return `0`;
1576
1577	if (record__aio_enabled(rec))
1578	off = record__aio_get_pos(trace_fd);
1579
1580	for (i = `0`; i < nr_mmaps; i++) {
1581	u64 flush = `0`;
1582	struct mmap *map = maps[i];
1583
1584	if (map->core.base) {
1585	record__adjust_affinity(rec, map);
1586	if (synch) {
1587	flush = map->core.flush;
1588	map->core.flush = `1`;
1589	}
1590	if (!record__aio_enabled(rec)) {
1591	if (perf_mmap__push(md: map, to: rec, push: record__pushfn) < `0`) {
1592	if (synch)
1593	map->core.flush = flush;
1594	rc = -`1`;
1595	goto out;
1596	}
1597	} else {
1598	if (record__aio_push(rec, map, off: &off) < `0`) {
1599	record__aio_set_pos(trace_fd, pos: off);
1600	if (synch)
1601	map->core.flush = flush;
1602	rc = -`1`;
1603	goto out;
1604	}
1605	}
1606	if (synch)
1607	map->core.flush = flush;
1608	}
1609
1610	if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1611	!rec->opts.auxtrace_sample_mode &&
1612	record__auxtrace_mmap_read(rec, map) != `0`) {
1613	rc = -`1`;
1614	goto out;
1615	}
1616	}
1617
1618	if (record__aio_enabled(rec))
1619	record__aio_set_pos(trace_fd, pos: off);
1620
1621	/*
1622	* Mark the round finished in case we wrote
1623	* at least one event.
1624	*
1625	* No need for round events in directory mode,
1626	* because per-cpu maps and files have data
1627	* sorted by kernel.
1628	*/
1629	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1630	rc = record__write(rec, NULL, bf: &finished_round_event, size: sizeof(finished_round_event));
1631
1632	if (overwrite)
1633	evlist__toggle_bkw_mmap(evlist, state: BKW_MMAP_EMPTY);
1634	out:
1635	return rc;
1636	}
1637
1638	static int record__mmap_read_all(struct record *rec, bool synch)
1639	{
1640	int err;
1641
1642	err = record__mmap_read_evlist(rec, evlist: rec->evlist, overwrite: false, synch);
1643	if (err)
1644	return err;
1645
1646	return record__mmap_read_evlist(rec, evlist: rec->evlist, overwrite: true, synch);
1647	}
1648
1649	static void record__thread_munmap_filtered(struct fdarray fda, int* fd,
1650	void *arg __maybe_unused)
1651	{
1652	struct perf_mmap *map = fda->priv[fd].ptr;
1653
1654	if (map)
1655	perf_mmap__put(map);
1656	}
1657
1658	static void record__thread(void* *arg)
1659	{
1660	enum thread_msg msg = THREAD_MSG__READY;
1661	bool terminate = false;
1662	struct fdarray *pollfd;
1663	int err, ctlfd_pos;
1664
1665	thread = arg;
1666	thread->tid = gettid();
1667
1668	err = write(thread->pipes.ack[`1`], &msg, sizeof(msg));
1669	if (err == -`1`)
1670	pr_warning("threads[%d]: failed to notify on start: %s\n",
1671	thread->tid, strerror(errno));
1672
1673	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1674
1675	pollfd = &thread->pollfd;
1676	ctlfd_pos = thread->ctlfd_pos;
1677
1678	for (;;) {
1679	unsigned long long hits = thread->samples;
1680
1681	if (record__mmap_read_all(rec: thread->rec, synch: false) < `0` \|\| terminate)
1682	break;
1683
1684	if (hits == thread->samples) {
1685
1686	err = fdarray__poll(pollfd, -`1`);
1687	/*
1688	* Propagate error, only if there's any. Ignore positive
1689	* number of returned events and interrupt error.
1690	*/
1691	if (err > `0` \|\| (err < `0` && errno == EINTR))
1692	err = `0`;
1693	thread->waking++;
1694
1695	if (fdarray__filter(pollfd, POLLERR \| POLLHUP,
1696	record__thread_munmap_filtered, NULL) == `0`)
1697	break;
1698	}
1699
1700	if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1701	terminate = true;
1702	close(thread->pipes.msg[`0`]);
1703	thread->pipes.msg[`0`] = -`1`;
1704	pollfd->entries[ctlfd_pos].fd = -`1`;
1705	pollfd->entries[ctlfd_pos].events = `0`;
1706	}
1707
1708	pollfd->entries[ctlfd_pos].revents = `0`;
1709	}
1710	record__mmap_read_all(rec: thread->rec, synch: true);
1711
1712	err = write(thread->pipes.ack[`1`], &msg, sizeof(msg));
1713	if (err == -`1`)
1714	pr_warning("threads[%d]: failed to notify on termination: %s\n",
1715	thread->tid, strerror(errno));
1716
1717	return NULL;
1718	}
1719
1720	static void record__init_features(struct record *rec)
1721	{
1722	struct perf_session *session = rec->session;
1723	int feat;
1724
1725	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1726	perf_header__set_feat(header: &session->header, feat);
1727
1728	if (rec->no_buildid)
1729	perf_header__clear_feat(header: &session->header, feat: HEADER_BUILD_ID);
1730
1731	#ifdef HAVE_LIBTRACEEVENT
1732	if (!have_tracepoints(&rec->evlist->core.entries))
1733	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1734	#endif
1735
1736	if (!rec->opts.branch_stack)
1737	perf_header__clear_feat(header: &session->header, feat: HEADER_BRANCH_STACK);
1738
1739	if (!rec->opts.full_auxtrace)
1740	perf_header__clear_feat(header: &session->header, feat: HEADER_AUXTRACE);
1741
1742	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1743	perf_header__clear_feat(header: &session->header, feat: HEADER_CLOCKID);
1744
1745	if (!rec->opts.use_clockid)
1746	perf_header__clear_feat(header: &session->header, feat: HEADER_CLOCK_DATA);
1747
1748	if (!record__threads_enabled(rec))
1749	perf_header__clear_feat(header: &session->header, feat: HEADER_DIR_FORMAT);
1750
1751	if (!record__comp_enabled(rec))
1752	perf_header__clear_feat(header: &session->header, feat: HEADER_COMPRESSED);
1753
1754	perf_header__clear_feat(header: &session->header, feat: HEADER_STAT);
1755	}
1756
1757	static void
1758	record__finish_output(struct record *rec)
1759	{
1760	int i;
1761	struct perf_data *data = &rec->data;
1762	int fd = perf_data__fd(data);
1763
1764	if (data->is_pipe)
1765	return;
1766
1767	rec->session->header.data_size += rec->bytes_written;
1768	data->file.size = lseek(perf_data__fd(data), `0`, SEEK_CUR);
1769	if (record__threads_enabled(rec)) {
1770	for (i = `0`; i < data->dir.nr; i++)
1771	data->dir.files[i].size = lseek(data->dir.files[i].fd, `0`, SEEK_CUR);
1772	}
1773
1774	if (!rec->no_buildid) {
1775	process_buildids(rec);
1776
1777	if (rec->buildid_all)
1778	dsos__hit_all(session: rec->session);
1779	}
1780	perf_session__write_header(session: rec->session, evlist: rec->evlist, fd, at_exit: true);
1781
1782	return;
1783	}
1784
1785	static int record__synthesize_workload(struct record *rec, bool tail)
1786	{
1787	int err;
1788	struct perf_thread_map *thread_map;
1789	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1790
1791	if (rec->opts.tail_synthesize != tail)
1792	return `0`;
1793
1794	thread_map = thread_map__new_by_tid(tid: rec->evlist->workload.pid);
1795	if (thread_map == NULL)
1796	return -`1`;
1797
1798	err = perf_event__synthesize_thread_map(tool: &rec->tool, threads: thread_map,
1799	process: process_synthesized_event,
1800	machine: &rec->session->machines.host,
1801	needs_mmap,
1802	mmap_data: rec->opts.sample_address);
1803	perf_thread_map__put(thread_map);
1804	return err;
1805	}
1806
1807	static int write_finished_init(struct record *rec, bool tail)
1808	{
1809	if (rec->opts.tail_synthesize != tail)
1810	return `0`;
1811
1812	return record__write(rec, NULL, bf: &finished_init_event, size: sizeof(finished_init_event));
1813	}
1814
1815	static int record__synthesize(struct record *rec, bool tail);
1816
1817	static int
1818	record__switch_output(struct record *rec, bool at_exit)
1819	{
1820	struct perf_data *data = &rec->data;
1821	int fd, err;
1822	char *new_filename;
1823
1824	/ Same Size: "2015122520103046"/
1825	char timestamp[] = "InvalidTimestamp";
1826
1827	record__aio_mmap_read_sync(rec);
1828
1829	write_finished_init(rec, tail: true);
1830
1831	record__synthesize(rec, tail: true);
1832	if (target__none(target: &rec->opts.target))
1833	record__synthesize_workload(rec, tail: true);
1834
1835	rec->samples = `0`;
1836	record__finish_output(rec);
1837	err = fetch_current_timestamp(buf: timestamp, sz: sizeof(timestamp));
1838	if (err) {
1839	pr_err("Failed to get current timestamp\n");
1840	return -EINVAL;
1841	}
1842
1843	fd = perf_data__switch(data, postfix: timestamp,
1844	pos: rec->session->header.data_offset,
1845	at_exit, new_filepath: &new_filename);
1846	if (fd >= `0` && !at_exit) {
1847	rec->bytes_written = `0`;
1848	rec->session->header.data_size = `0`;
1849	}
1850
1851	if (!quiet)
1852	fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1853	data->path, timestamp);
1854
1855	if (rec->switch_output.num_files) {
1856	int n = rec->switch_output.cur_file + `1`;
1857
1858	if (n >= rec->switch_output.num_files)
1859	n = `0`;
1860	rec->switch_output.cur_file = n;
1861	if (rec->switch_output.filenames[n]) {
1862	remove(rec->switch_output.filenames[n]);
1863	zfree(&rec->switch_output.filenames[n]);
1864	}
1865	rec->switch_output.filenames[n] = new_filename;
1866	} else {
1867	free(new_filename);
1868	}
1869
1870	/ Output tracking events /
1871	if (!at_exit) {
1872	record__synthesize(rec, tail: false);
1873
1874	/*
1875	* In 'perf record --switch-output' without -a,
1876	* record__synthesize() in record__switch_output() won't
1877	* generate tracking events because there's no thread_map
1878	* in evlist. Which causes newly created perf.data doesn't
1879	* contain map and comm information.
1880	* Create a fake thread_map and directly call
1881	* perf_event__synthesize_thread_map() for those events.
1882	*/
1883	if (target__none(target: &rec->opts.target))
1884	record__synthesize_workload(rec, tail: false);
1885	write_finished_init(rec, tail: false);
1886	}
1887	return fd;
1888	}
1889
1890	static void __record__save_lost_samples(struct record rec, struct* evsel *evsel,
1891	struct perf_record_lost_samples *lost,
1892	int cpu_idx, int thread_idx, u64 lost_count,
1893	u16 misc_flag)
1894	{
1895	struct perf_sample_id *sid;
1896	struct perf_sample sample = {};
1897	int id_hdr_size;
1898
1899	lost->lost = lost_count;
1900	if (evsel->core.ids) {
1901	sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1902	sample.id = sid->id;
1903	}
1904
1905	id_hdr_size = perf_event__synthesize_id_sample(array: (void *)(lost + `1`),
1906	type: evsel->core.attr.sample_type, sample: &sample);
1907	lost->header.size = sizeof(*lost) + id_hdr_size;
1908	lost->header.misc = misc_flag;
1909	record__write(rec, NULL, bf: lost, size: lost->header.size);
1910	}
1911
1912	static void record__read_lost_samples(struct record *rec)
1913	{
1914	struct perf_session *session = rec->session;
1915	struct perf_record_lost_samples *lost;
1916	struct evsel *evsel;
1917
1918	/ there was an error during record__open /
1919	if (session->evlist == NULL)
1920	return;
1921
1922	lost = zalloc(PERF_SAMPLE_MAX_SIZE);
1923	if (lost == NULL) {
1924	pr_debug("Memory allocation failed\n");
1925	return;
1926	}
1927
1928	lost->header.type = PERF_RECORD_LOST_SAMPLES;
1929
1930	evlist__for_each_entry(session->evlist, evsel) {
1931	struct xyarray *xy = evsel->core.sample_id;
1932	u64 lost_count;
1933
1934	if (xy == NULL \|\| evsel->core.fd == NULL)
1935	continue;
1936	if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) \|\|
1937	xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1938	pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1939	continue;
1940	}
1941
1942	for (int x = `0`; x < xyarray__max_x(xy); x++) {
1943	for (int y = `0`; y < xyarray__max_y(xy); y++) {
1944	struct perf_counts_values count;
1945
1946	if (perf_evsel__read(&evsel->core, x, y, &count) < `0`) {
1947	pr_debug("read LOST count failed\n");
1948	goto out;
1949	}
1950
1951	if (count.lost) {
1952	__record__save_lost_samples(rec, evsel, lost,
1953	cpu_idx: x, thread_idx: y, lost_count: count.lost, misc_flag: `0`);
1954	}
1955	}
1956	}
1957
1958	lost_count = perf_bpf_filter__lost_count(evsel);
1959	if (lost_count)
1960	__record__save_lost_samples(rec, evsel, lost, `0`, `0`, lost_count,
1961	PERF_RECORD_MISC_LOST_SAMPLES_BPF);
1962	}
1963	out:
1964	free(lost);
1965	}
1966
1967	static volatile sig_atomic_t workload_exec_errno;
1968
1969	/*
1970	* evlist__prepare_workload will send a SIGUSR1
1971	* if the fork fails, since we asked by setting its
1972	* want_signal to true.
1973	*/
1974	static void workload_exec_failed_signal(int signo __maybe_unused,
1975	siginfo_t *info,
1976	void *ucontext __maybe_unused)
1977	{
1978	workload_exec_errno = info->si_value.sival_int;
1979	done = `1`;
1980	child_finished = `1`;
1981	}
1982
1983	static void snapshot_sig_handler(int sig);
1984	static void alarm_sig_handler(int sig);
1985
1986	static const struct perf_event_mmap_page evlist__pick_pc(struct* evlist *evlist)
1987	{
1988	if (evlist) {
1989	if (evlist->mmap && evlist->mmap[`0`].core.base)
1990	return evlist->mmap[`0`].core.base;
1991	if (evlist->overwrite_mmap && evlist->overwrite_mmap[`0`].core.base)
1992	return evlist->overwrite_mmap[`0`].core.base;
1993	}
1994	return NULL;
1995	}
1996
1997	static const struct perf_event_mmap_page record__pick_pc(struct* record *rec)
1998	{
1999	const struct perf_event_mmap_page *pc = evlist__pick_pc(evlist: rec->evlist);
2000	if (pc)
2001	return pc;
2002	return NULL;
2003	}
2004
2005	static int record__synthesize(struct record *rec, bool tail)
2006	{
2007	struct perf_session *session = rec->session;
2008	struct machine *machine = &session->machines.host;
2009	struct perf_data *data = &rec->data;
2010	struct record_opts *opts = &rec->opts;
2011	struct perf_tool *tool = &rec->tool;
2012	int err = `0`;
2013	event_op f = process_synthesized_event;
2014
2015	if (rec->opts.tail_synthesize != tail)
2016	return `0`;
2017
2018	if (data->is_pipe) {
2019	err = perf_event__synthesize_for_pipe(tool, session, data,
2020	process: process_synthesized_event);
2021	if (err < `0`)
2022	goto out;
2023
2024	rec->bytes_written += err;
2025	}
2026
2027	err = perf_event__synth_time_conv(pc: record__pick_pc(rec), tool,
2028	process: process_synthesized_event, machine);
2029	if (err)
2030	goto out;
2031
2032	/ Synthesize id_index before auxtrace_info /
2033	err = perf_event__synthesize_id_index(tool,
2034	process: process_synthesized_event,
2035	evlist: session->evlist, machine);
2036	if (err)
2037	goto out;
2038
2039	if (rec->opts.full_auxtrace) {
2040	err = perf_event__synthesize_auxtrace_info(itr: rec->itr, tool,
2041	session, process: process_synthesized_event);
2042	if (err)
2043	goto out;
2044	}
2045
2046	if (!evlist__exclude_kernel(evlist: rec->evlist)) {
2047	err = perf_event__synthesize_kernel_mmap(tool, process: process_synthesized_event,
2048	machine);
2049	WARN_ONCE(err < `0`, "Couldn't record kernel reference relocation symbol\n"
2050	"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2051	"Check /proc/kallsyms permission or run as root.\n");
2052
2053	err = perf_event__synthesize_modules(tool, process: process_synthesized_event,
2054	machine);
2055	WARN_ONCE(err < `0`, "Couldn't record kernel module information.\n"
2056	"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2057	"Check /proc/modules permission or run as root.\n");
2058	}
2059
2060	if (perf_guest) {
2061	machines__process_guests(machines: &session->machines,
2062	process: perf_event__synthesize_guest_os, data: tool);
2063	}
2064
2065	err = perf_event__synthesize_extra_attr(tool: &rec->tool,
2066	evsel_list: rec->evlist,
2067	process: process_synthesized_event,
2068	is_pipe: data->is_pipe);
2069	if (err)
2070	goto out;
2071
2072	err = perf_event__synthesize_thread_map2(tool: &rec->tool, threads: rec->evlist->core.threads,
2073	process: process_synthesized_event,
2074	NULL);
2075	if (err < `0`) {
2076	pr_err("Couldn't synthesize thread map.\n");
2077	return err;
2078	}
2079
2080	err = perf_event__synthesize_cpu_map(tool: &rec->tool, cpus: rec->evlist->core.all_cpus,
2081	process: process_synthesized_event, NULL);
2082	if (err < `0`) {
2083	pr_err("Couldn't synthesize cpu map.\n");
2084	return err;
2085	}
2086
2087	err = perf_event__synthesize_bpf_events(session, process: process_synthesized_event,
2088	machine, opts);
2089	if (err < `0`) {
2090	pr_warning("Couldn't synthesize bpf events.\n");
2091	err = `0`;
2092	}
2093
2094	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2095	err = perf_event__synthesize_cgroups(tool, process: process_synthesized_event,
2096	machine);
2097	if (err < `0`) {
2098	pr_warning("Couldn't synthesize cgroup events.\n");
2099	err = `0`;
2100	}
2101	}
2102
2103	if (rec->opts.nr_threads_synthesize > `1`) {
2104	mutex_init(&synth_lock);
2105	perf_set_multithreaded();
2106	f = process_locked_synthesized_event;
2107	}
2108
2109	if (rec->opts.synth & PERF_SYNTH_TASK) {
2110	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2111
2112	err = __machine__synthesize_threads(machine, tool, target: &opts->target,
2113	threads: rec->evlist->core.threads,
2114	process: f, needs_mmap, data_mmap: opts->sample_address,
2115	nr_threads_synthesize: rec->opts.nr_threads_synthesize);
2116	}
2117
2118	if (rec->opts.nr_threads_synthesize > `1`) {
2119	perf_set_singlethreaded();
2120	mutex_destroy(lock: &synth_lock);
2121	}
2122
2123	out:
2124	return err;
2125	}
2126
2127	static int record__process_signal_event(union perf_event event __maybe_unused, void* *data)
2128	{
2129	struct record *rec = data;
2130	pthread_kill(rec->thread_id, SIGUSR2);
2131	return `0`;
2132	}
2133
2134	static int record__setup_sb_evlist(struct record *rec)
2135	{
2136	struct record_opts *opts = &rec->opts;
2137
2138	if (rec->sb_evlist != NULL) {
2139	/*
2140	* We get here if --switch-output-event populated the
2141	* sb_evlist, so associate a callback that will send a SIGUSR2
2142	* to the main thread.
2143	*/
2144	evlist__set_cb(evlist: rec->sb_evlist, cb: record__process_signal_event, data: rec);
2145	rec->thread_id = pthread_self();
2146	}
2147	#ifdef HAVE_LIBBPF_SUPPORT
2148	if (!opts->no_bpf_event) {
2149	if (rec->sb_evlist == NULL) {
2150	rec->sb_evlist = evlist__new();
2151
2152	if (rec->sb_evlist == NULL) {
2153	pr_err("Couldn't create side band evlist.\n.");
2154	return -`1`;
2155	}
2156	}
2157
2158	if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
2159	pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2160	return -`1`;
2161	}
2162	}
2163	#endif
2164	if (evlist__start_sb_thread(evlist: rec->sb_evlist, target: &rec->opts.target)) {
2165	pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2166	opts->no_bpf_event = true;
2167	}
2168
2169	return `0`;
2170	}
2171
2172	static int record__init_clock(struct record *rec)
2173	{
2174	struct perf_session *session = rec->session;
2175	struct timespec ref_clockid;
2176	struct timeval ref_tod;
2177	u64 ref;
2178
2179	if (!rec->opts.use_clockid)
2180	return `0`;
2181
2182	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2183	session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2184
2185	session->header.env.clock.clockid = rec->opts.clockid;
2186
2187	if (gettimeofday(&ref_tod, NULL) != `0`) {
2188	pr_err("gettimeofday failed, cannot set reference time.\n");
2189	return -`1`;
2190	}
2191
2192	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2193	pr_err("clock_gettime failed, cannot set reference time.\n");
2194	return -`1`;
2195	}
2196
2197	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2198	(u64) ref_tod.tv_usec * NSEC_PER_USEC;
2199
2200	session->header.env.clock.tod_ns = ref;
2201
2202	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2203	(u64) ref_clockid.tv_nsec;
2204
2205	session->header.env.clock.clockid_ns = ref;
2206	return `0`;
2207	}
2208
2209	static void hit_auxtrace_snapshot_trigger(struct record *rec)
2210	{
2211	if (trigger_is_ready(t: &auxtrace_snapshot_trigger)) {
2212	trigger_hit(t: &auxtrace_snapshot_trigger);
2213	auxtrace_record__snapshot_started = `1`;
2214	if (auxtrace_record__snapshot_start(itr: rec->itr))
2215	trigger_error(t: &auxtrace_snapshot_trigger);
2216	}
2217	}
2218
2219	static void record__uniquify_name(struct record *rec)
2220	{
2221	struct evsel *pos;
2222	struct evlist *evlist = rec->evlist;
2223	char *new_name;
2224	int ret;
2225
2226	if (perf_pmus__num_core_pmus() == `1`)
2227	return;
2228
2229	evlist__for_each_entry(evlist, pos) {
2230	if (!evsel__is_hybrid(evsel: pos))
2231	continue;
2232
2233	if (strchr(pos->name, `'/'`))
2234	continue;
2235
2236	ret = asprintf(&new_name, "%s/%s/",
2237	pos->pmu_name, pos->name);
2238	if (ret) {
2239	free(pos->name);
2240	pos->name = new_name;
2241	}
2242	}
2243	}
2244
2245	static int record__terminate_thread(struct record_thread *thread_data)
2246	{
2247	int err;
2248	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2249	pid_t tid = thread_data->tid;
2250
2251	close(thread_data->pipes.msg[`1`]);
2252	thread_data->pipes.msg[`1`] = -`1`;
2253	err = read(thread_data->pipes.ack[`0`], &ack, sizeof(ack));
2254	if (err > `0`)
2255	pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2256	else
2257	pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2258	thread->tid, tid);
2259
2260	return `0`;
2261	}
2262
2263	static int record__start_threads(struct record *rec)
2264	{
2265	int t, tt, err, ret = `0`, nr_threads = rec->nr_threads;
2266	struct record_thread *thread_data = rec->thread_data;
2267	sigset_t full, mask;
2268	pthread_t handle;
2269	pthread_attr_t attrs;
2270
2271	thread = &thread_data[`0`];
2272
2273	if (!record__threads_enabled(rec))
2274	return `0`;
2275
2276	sigfillset(set: &full);
2277	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2278	pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2279	return -`1`;
2280	}
2281
2282	pthread_attr_init(&attrs);
2283	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2284
2285	for (t = `1`; t < nr_threads; t++) {
2286	enum thread_msg msg = THREAD_MSG__UNDEFINED;
2287
2288	#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2289	pthread_attr_setaffinity_np(&attrs,
2290	MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2291	(cpu_set_t *)(thread_data[t].mask->affinity.bits));
2292	#endif
2293	if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2294	for (tt = `1`; tt < t; tt++)
2295	record__terminate_thread(thread_data: &thread_data[t]);
2296	pr_err("Failed to start threads: %s\n", strerror(errno));
2297	ret = -`1`;
2298	goto out_err;
2299	}
2300
2301	err = read(thread_data[t].pipes.ack[`0`], &msg, sizeof(msg));
2302	if (err > `0`)
2303	pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2304	thread_msg_tags[msg]);
2305	else
2306	pr_warning("threads[%d]: failed to receive start notification from %d\n",
2307	thread->tid, rec->thread_data[t].tid);
2308	}
2309
2310	sched_setaffinity(`0`, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2311	(cpu_set_t *)thread->mask->affinity.bits);
2312
2313	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2314
2315	out_err:
2316	pthread_attr_destroy(&attrs);
2317
2318	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2319	pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2320	ret = -`1`;
2321	}
2322
2323	return ret;
2324	}
2325
2326	static int record__stop_threads(struct record *rec)
2327	{
2328	int t;
2329	struct record_thread *thread_data = rec->thread_data;
2330
2331	for (t = `1`; t < rec->nr_threads; t++)
2332	record__terminate_thread(thread_data: &thread_data[t]);
2333
2334	for (t = `0`; t < rec->nr_threads; t++) {
2335	rec->samples += thread_data[t].samples;
2336	if (!record__threads_enabled(rec))
2337	continue;
2338	rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2339	rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2340	pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2341	thread_data[t].samples, thread_data[t].waking);
2342	if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2343	pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2344	thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2345	else
2346	pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2347	}
2348
2349	return `0`;
2350	}
2351
2352	static unsigned long record__waking(struct record *rec)
2353	{
2354	int t;
2355	unsigned long waking = `0`;
2356	struct record_thread *thread_data = rec->thread_data;
2357
2358	for (t = `0`; t < rec->nr_threads; t++)
2359	waking += thread_data[t].waking;
2360
2361	return waking;
2362	}
2363
2364	static int __cmd_record(struct record rec, int* argc, const char **argv)
2365	{
2366	int err;
2367	int status = `0`;
2368	const bool forks = argc > `0`;
2369	struct perf_tool *tool = &rec->tool;
2370	struct record_opts *opts = &rec->opts;
2371	struct perf_data *data = &rec->data;
2372	struct perf_session *session;
2373	bool disabled = false, draining = false;
2374	int fd;
2375	float ratio = `0`;
2376	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2377
2378	atexit(record__sig_exit);
2379	signal(SIGCHLD, sig_handler);
2380	signal(SIGINT, sig_handler);
2381	signal(SIGTERM, sig_handler);
2382	signal(SIGSEGV, sigsegv_handler);
2383
2384	if (rec->opts.record_namespaces)
2385	tool->namespace_events = true;
2386
2387	if (rec->opts.record_cgroup) {
2388	#ifdef HAVE_FILE_HANDLE
2389	tool->cgroup_events = true;
2390	#else
2391	pr_err("cgroup tracking is not supported\n");
2392	return -`1`;
2393	#endif
2394	}
2395
2396	if (rec->opts.auxtrace_snapshot_mode \|\| rec->switch_output.enabled) {
2397	signal(SIGUSR2, snapshot_sig_handler);
2398	if (rec->opts.auxtrace_snapshot_mode)
2399	trigger_on(t: &auxtrace_snapshot_trigger);
2400	if (rec->switch_output.enabled)
2401	trigger_on(t: &switch_output_trigger);
2402	} else {
2403	signal(SIGUSR2, SIG_IGN);
2404	}
2405
2406	session = perf_session__new(data, tool);
2407	if (IS_ERR(ptr: session)) {
2408	pr_err("Perf session creation failed.\n");
2409	return PTR_ERR(ptr: session);
2410	}
2411
2412	if (record__threads_enabled(rec)) {
2413	if (perf_data__is_pipe(data: &rec->data)) {
2414	pr_err("Parallel trace streaming is not available in pipe mode.\n");
2415	return -`1`;
2416	}
2417	if (rec->opts.full_auxtrace) {
2418	pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2419	return -`1`;
2420	}
2421	}
2422
2423	fd = perf_data__fd(data);
2424	rec->session = session;
2425
2426	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < `0`) {
2427	pr_err("Compression initialization failed.\n");
2428	return -`1`;
2429	}
2430	#ifdef HAVE_EVENTFD_SUPPORT
2431	done_fd = eventfd(`0`, EFD_NONBLOCK);
2432	if (done_fd < `0`) {
2433	pr_err("Failed to create wakeup eventfd, error: %m\n");
2434	status = -`1`;
2435	goto out_delete_session;
2436	}
2437	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2438	if (err < `0`) {
2439	pr_err("Failed to add wakeup eventfd to poll list\n");
2440	status = err;
2441	goto out_delete_session;
2442	}
2443	#endif // HAVE_EVENTFD_SUPPORT
2444
2445	session->header.env.comp_type = PERF_COMP_ZSTD;
2446	session->header.env.comp_level = rec->opts.comp_level;
2447
2448	if (rec->opts.kcore &&
2449	!record__kcore_readable(machine: &session->machines.host)) {
2450	pr_err("ERROR: kcore is not readable.\n");
2451	return -`1`;
2452	}
2453
2454	if (record__init_clock(rec))
2455	return -`1`;
2456
2457	record__init_features(rec);
2458
2459	if (forks) {
2460	err = evlist__prepare_workload(evlist: rec->evlist, target: &opts->target, argv, pipe_output: data->is_pipe,
2461	exec_error: workload_exec_failed_signal);
2462	if (err < `0`) {
2463	pr_err("Couldn't run the workload!\n");
2464	status = err;
2465	goto out_delete_session;
2466	}
2467	}
2468
2469	/*
2470	* If we have just single event and are sending data
2471	* through pipe, we need to force the ids allocation,
2472	* because we synthesize event name through the pipe
2473	* and need the id for that.
2474	*/
2475	if (data->is_pipe && rec->evlist->core.nr_entries == `1`)
2476	rec->opts.sample_id = true;
2477
2478	record__uniquify_name(rec);
2479
2480	/ Debug message used by test scripts /
2481	pr_debug3("perf record opening and mmapping events\n");
2482	if (record__open(rec) != `0`) {
2483	err = -`1`;
2484	goto out_free_threads;
2485	}
2486	/ Debug message used by test scripts /
2487	pr_debug3("perf record done opening and mmapping events\n");
2488	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2489
2490	if (rec->opts.kcore) {
2491	err = record__kcore_copy(machine: &session->machines.host, data);
2492	if (err) {
2493	pr_err("ERROR: Failed to copy kcore\n");
2494	goto out_free_threads;
2495	}
2496	}
2497
2498	/*
2499	* Normally perf_session__new would do this, but it doesn't have the
2500	* evlist.
2501	*/
2502	if (rec->tool.ordered_events && !evlist__sample_id_all(evlist: rec->evlist)) {
2503	pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2504	rec->tool.ordered_events = false;
2505	}
2506
2507	if (evlist__nr_groups(evlist: rec->evlist) == `0`)
2508	perf_header__clear_feat(header: &session->header, feat: HEADER_GROUP_DESC);
2509
2510	if (data->is_pipe) {
2511	err = perf_header__write_pipe(fd);
2512	if (err < `0`)
2513	goto out_free_threads;
2514	} else {
2515	err = perf_session__write_header(session, evlist: rec->evlist, fd, at_exit: false);
2516	if (err < `0`)
2517	goto out_free_threads;
2518	}
2519
2520	err = -`1`;
2521	if (!rec->no_buildid
2522	&& !perf_header__has_feat(header: &session->header, feat: HEADER_BUILD_ID)) {
2523	pr_err("Couldn't generate buildids. "
2524	"Use --no-buildid to profile anyway.\n");
2525	goto out_free_threads;
2526	}
2527
2528	err = record__setup_sb_evlist(rec);
2529	if (err)
2530	goto out_free_threads;
2531
2532	err = record__synthesize(rec, tail: false);
2533	if (err < `0`)
2534	goto out_free_threads;
2535
2536	if (rec->realtime_prio) {
2537	struct sched_param param;
2538
2539	param.sched_priority = rec->realtime_prio;
2540	if (sched_setscheduler(`0`, SCHED_FIFO, &param)) {
2541	pr_err("Could not set realtime priority.\n");
2542	err = -`1`;
2543	goto out_free_threads;
2544	}
2545	}
2546
2547	if (record__start_threads(rec))
2548	goto out_free_threads;
2549
2550	/*
2551	* When perf is starting the traced process, all the events
2552	* (apart from group members) have enable_on_exec=1 set,
2553	* so don't spoil it by prematurely enabling them.
2554	*/
2555	if (!target__none(target: &opts->target) && !opts->target.initial_delay)
2556	evlist__enable(evlist: rec->evlist);
2557
2558	/*
2559	* Let the child rip
2560	*/
2561	if (forks) {
2562	struct machine *machine = &session->machines.host;
2563	union perf_event *event;
2564	pid_t tgid;
2565
2566	event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2567	if (event == NULL) {
2568	err = -ENOMEM;
2569	goto out_child;
2570	}
2571
2572	/*
2573	* Some H/W events are generated before COMM event
2574	* which is emitted during exec(), so perf script
2575	* cannot see a correct process name for those events.
2576	* Synthesize COMM event to prevent it.
2577	*/
2578	tgid = perf_event__synthesize_comm(tool, event,
2579	pid: rec->evlist->workload.pid,
2580	process: process_synthesized_event,
2581	machine);
2582	free(event);
2583
2584	if (tgid == -`1`)
2585	goto out_child;
2586
2587	event = malloc(sizeof(event->namespaces) +
2588	(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2589	machine->id_hdr_size);
2590	if (event == NULL) {
2591	err = -ENOMEM;
2592	goto out_child;
2593	}
2594
2595	/*
2596	* Synthesize NAMESPACES event for the command specified.
2597	*/
2598	perf_event__synthesize_namespaces(tool, event,
2599	pid: rec->evlist->workload.pid,
2600	tgid, process: process_synthesized_event,
2601	machine);
2602	free(event);
2603
2604	evlist__start_workload(evlist: rec->evlist);
2605	}
2606
2607	if (opts->target.initial_delay) {
2608	pr_info(EVLIST_DISABLED_MSG);
2609	if (opts->target.initial_delay > `0`) {
2610	usleep(opts->target.initial_delay * USEC_PER_MSEC);
2611	evlist__enable(evlist: rec->evlist);
2612	pr_info(EVLIST_ENABLED_MSG);
2613	}
2614	}
2615
2616	err = event_enable_timer__start(eet: rec->evlist->eet);
2617	if (err)
2618	goto out_child;
2619
2620	/ Debug message used by test scripts /
2621	pr_debug3("perf record has started\n");
2622	fflush(stderr);
2623
2624	trigger_ready(t: &auxtrace_snapshot_trigger);
2625	trigger_ready(t: &switch_output_trigger);
2626	perf_hooks__invoke_record_start();
2627
2628	/*
2629	* Must write FINISHED_INIT so it will be seen after all other
2630	* synthesized user events, but before any regular events.
2631	*/
2632	err = write_finished_init(rec, tail: false);
2633	if (err < `0`)
2634	goto out_child;
2635
2636	for (;;) {
2637	unsigned long long hits = thread->samples;
2638
2639	/*
2640	* rec->evlist->bkw_mmap_state is possible to be
2641	* BKW_MMAP_EMPTY here: when done == true and
2642	* hits != rec->samples in previous round.
2643	*
2644	* evlist__toggle_bkw_mmap ensure we never
2645	* convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2646	*/
2647	if (trigger_is_hit(t: &switch_output_trigger) \|\| done \|\| draining)
2648	evlist__toggle_bkw_mmap(evlist: rec->evlist, state: BKW_MMAP_DATA_PENDING);
2649
2650	if (record__mmap_read_all(rec, synch: false) < `0`) {
2651	trigger_error(t: &auxtrace_snapshot_trigger);
2652	trigger_error(t: &switch_output_trigger);
2653	err = -`1`;
2654	goto out_child;
2655	}
2656
2657	if (auxtrace_record__snapshot_started) {
2658	auxtrace_record__snapshot_started = `0`;
2659	if (!trigger_is_error(t: &auxtrace_snapshot_trigger))
2660	record__read_auxtrace_snapshot(rec, on_exit: false);
2661	if (trigger_is_error(t: &auxtrace_snapshot_trigger)) {
2662	pr_err("AUX area tracing snapshot failed\n");
2663	err = -`1`;
2664	goto out_child;
2665	}
2666	}
2667
2668	if (trigger_is_hit(t: &switch_output_trigger)) {
2669	/*
2670	* If switch_output_trigger is hit, the data in
2671	* overwritable ring buffer should have been collected,
2672	* so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2673	*
2674	* If SIGUSR2 raise after or during record__mmap_read_all(),
2675	* record__mmap_read_all() didn't collect data from
2676	* overwritable ring buffer. Read again.
2677	*/
2678	if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2679	continue;
2680	trigger_ready(t: &switch_output_trigger);
2681
2682	/*
2683	* Reenable events in overwrite ring buffer after
2684	* record__mmap_read_all(): we should have collected
2685	* data from it.
2686	*/
2687	evlist__toggle_bkw_mmap(evlist: rec->evlist, state: BKW_MMAP_RUNNING);
2688
2689	if (!quiet)
2690	fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2691	record__waking(rec));
2692	thread->waking = `0`;
2693	fd = record__switch_output(rec, at_exit: false);
2694	if (fd < `0`) {
2695	pr_err("Failed to switch to new file\n");
2696	trigger_error(t: &switch_output_trigger);
2697	err = fd;
2698	goto out_child;
2699	}
2700
2701	/ re-arm the alarm /
2702	if (rec->switch_output.time)
2703	alarm(rec->switch_output.time);
2704	}
2705
2706	if (hits == thread->samples) {
2707	if (done \|\| draining)
2708	break;
2709	err = fdarray__poll(&thread->pollfd, -`1`);
2710	/*
2711	* Propagate error, only if there's any. Ignore positive
2712	* number of returned events and interrupt error.
2713	*/
2714	if (err > `0` \|\| (err < `0` && errno == EINTR))
2715	err = `0`;
2716	thread->waking++;
2717
2718	if (fdarray__filter(&thread->pollfd, POLLERR \| POLLHUP,
2719	record__thread_munmap_filtered, NULL) == `0`)
2720	draining = true;
2721
2722	err = record__update_evlist_pollfd_from_thread(rec, evlist: rec->evlist, thread_data: thread);
2723	if (err)
2724	goto out_child;
2725	}
2726
2727	if (evlist__ctlfd_process(evlist: rec->evlist, cmd: &cmd) > `0`) {
2728	switch (cmd) {
2729	case EVLIST_CTL_CMD_SNAPSHOT:
2730	hit_auxtrace_snapshot_trigger(rec);
2731	evlist__ctlfd_ack(evlist: rec->evlist);
2732	break;
2733	case EVLIST_CTL_CMD_STOP:
2734	done = `1`;
2735	break;
2736	case EVLIST_CTL_CMD_ACK:
2737	case EVLIST_CTL_CMD_UNSUPPORTED:
2738	case EVLIST_CTL_CMD_ENABLE:
2739	case EVLIST_CTL_CMD_DISABLE:
2740	case EVLIST_CTL_CMD_EVLIST:
2741	case EVLIST_CTL_CMD_PING:
2742	default:
2743	break;
2744	}
2745	}
2746
2747	err = event_enable_timer__process(eet: rec->evlist->eet);
2748	if (err < `0`)
2749	goto out_child;
2750	if (err) {
2751	err = `0`;
2752	done = `1`;
2753	}
2754
2755	/*
2756	* When perf is starting the traced process, at the end events
2757	* die with the process and we wait for that. Thus no need to
2758	* disable events in this case.
2759	*/
2760	if (done && !disabled && !target__none(target: &opts->target)) {
2761	trigger_off(t: &auxtrace_snapshot_trigger);
2762	evlist__disable(evlist: rec->evlist);
2763	disabled = true;
2764	}
2765	}
2766
2767	trigger_off(t: &auxtrace_snapshot_trigger);
2768	trigger_off(t: &switch_output_trigger);
2769
2770	if (opts->auxtrace_snapshot_on_exit)
2771	record__auxtrace_snapshot_exit(rec);
2772
2773	if (forks && workload_exec_errno) {
2774	char msg[STRERR_BUFSIZE], strevsels[`2048`];
2775	const char emsg = str_error_r(workload_exec_errno, msg, sizeof*(msg));
2776
2777	evlist__scnprintf_evsels(evlist: rec->evlist, size: sizeof(strevsels), bf: strevsels);
2778
2779	pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2780	strevsels, argv[`0`], emsg);
2781	err = -`1`;
2782	goto out_child;
2783	}
2784
2785	if (!quiet)
2786	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2787	record__waking(rec));
2788
2789	write_finished_init(rec, tail: true);
2790
2791	if (target__none(target: &rec->opts.target))
2792	record__synthesize_workload(rec, tail: true);
2793
2794	out_child:
2795	record__stop_threads(rec);
2796	record__mmap_read_all(rec, synch: true);
2797	out_free_threads:
2798	record__free_thread_data(rec);
2799	evlist__finalize_ctlfd(evlist: rec->evlist);
2800	record__aio_mmap_read_sync(rec);
2801
2802	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2803	ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2804	session->header.env.comp_ratio = ratio + `0.5`;
2805	}
2806
2807	if (forks) {
2808	int exit_status;
2809
2810	if (!child_finished)
2811	kill(rec->evlist->workload.pid, SIGTERM);
2812
2813	wait(&exit_status);
2814
2815	if (err < `0`)
2816	status = err;
2817	else if (WIFEXITED(exit_status))
2818	status = WEXITSTATUS(exit_status);
2819	else if (WIFSIGNALED(exit_status))
2820	signr = WTERMSIG(exit_status);
2821	} else
2822	status = err;
2823
2824	if (rec->off_cpu)
2825	rec->bytes_written += off_cpu_write(session: rec->session);
2826
2827	record__read_lost_samples(rec);
2828	record__synthesize(rec, tail: true);
2829	/ this will be recalculated during process_buildids() /
2830	rec->samples = `0`;
2831
2832	if (!err) {
2833	if (!rec->timestamp_filename) {
2834	record__finish_output(rec);
2835	} else {
2836	fd = record__switch_output(rec, at_exit: true);
2837	if (fd < `0`) {
2838	status = fd;
2839	goto out_delete_session;
2840	}
2841	}
2842	}
2843
2844	perf_hooks__invoke_record_end();
2845
2846	if (!err && !quiet) {
2847	char samples[`128`];
2848	const char *postfix = rec->timestamp_filename ?
2849	".<timestamp>" : "";
2850
2851	if (rec->samples && !rec->opts.full_auxtrace)
2852	scnprintf(samples, sizeof(samples),
2853	" (%" PRIu64 " samples)", rec->samples);
2854	else
2855	samples[`0`] = `'\0'`;
2856
2857	fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2858	perf_data__size(data) / `1024.0` / `1024.0`,
2859	data->path, postfix, samples);
2860	if (ratio) {
2861	fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2862	rec->session->bytes_transferred / `1024.0` / `1024.0`,
2863	ratio);
2864	}
2865	fprintf(stderr, " ]\n");
2866	}
2867
2868	out_delete_session:
2869	#ifdef HAVE_EVENTFD_SUPPORT
2870	if (done_fd >= `0`) {
2871	fd = done_fd;
2872	done_fd = -`1`;
2873
2874	close(fd);
2875	}
2876	#endif
2877	zstd_fini(&session->zstd_data);
2878	perf_session__delete(session);
2879
2880	if (!opts->no_bpf_event)
2881	evlist__stop_sb_thread(evlist: rec->sb_evlist);
2882	return status;
2883	}
2884
2885	static void callchain_debug(struct callchain_param *callchain)
2886	{
2887	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2888
2889	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2890
2891	if (callchain->record_mode == CALLCHAIN_DWARF)
2892	pr_debug("callchain: stack dump size %d\n",
2893	callchain->dump_size);
2894	}
2895
2896	int record_opts__parse_callchain(struct record_opts *record,
2897	struct callchain_param *callchain,
2898	const char *arg, bool unset)
2899	{
2900	int ret;
2901	callchain->enabled = !unset;
2902
2903	/ --no-call-graph /
2904	if (unset) {
2905	callchain->record_mode = CALLCHAIN_NONE;
2906	pr_debug("callchain: disabled\n");
2907	return `0`;
2908	}
2909
2910	ret = parse_callchain_record_opt(arg, param: callchain);
2911	if (!ret) {
2912	/ Enable data address sampling for DWARF unwind. /
2913	if (callchain->record_mode == CALLCHAIN_DWARF)
2914	record->sample_address = true;
2915	callchain_debug(callchain);
2916	}
2917
2918	return ret;
2919	}
2920
2921	int record_parse_callchain_opt(const struct option *opt,
2922	const char *arg,
2923	int unset)
2924	{
2925	return record_opts__parse_callchain(record: opt->value, callchain: &callchain_param, arg, unset);
2926	}
2927
2928	int record_callchain_opt(const struct option *opt,
2929	const char *arg __maybe_unused,
2930	int unset __maybe_unused)
2931	{
2932	struct callchain_param *callchain = opt->value;
2933
2934	callchain->enabled = true;
2935
2936	if (callchain->record_mode == CALLCHAIN_NONE)
2937	callchain->record_mode = CALLCHAIN_FP;
2938
2939	callchain_debug(callchain);
2940	return `0`;
2941	}
2942
2943	static int perf_record_config(const char var, const* char value, void* *cb)
2944	{
2945	struct record *rec = cb;
2946
2947	if (!strcmp(var, "record.build-id")) {
2948	if (!strcmp(value, "cache"))
2949	rec->no_buildid_cache = false;
2950	else if (!strcmp(value, "no-cache"))
2951	rec->no_buildid_cache = true;
2952	else if (!strcmp(value, "skip"))
2953	rec->no_buildid = true;
2954	else if (!strcmp(value, "mmap"))
2955	rec->buildid_mmap = true;
2956	else
2957	return -`1`;
2958	return `0`;
2959	}
2960	if (!strcmp(var, "record.call-graph")) {
2961	var = "call-graph.record-mode";
2962	return perf_default_config(var, value, cb);
2963	}
2964	#ifdef HAVE_AIO_SUPPORT
2965	if (!strcmp(var, "record.aio")) {
2966	rec->opts.nr_cblocks = strtol(value, NULL, `0`);
2967	if (!rec->opts.nr_cblocks)
2968	rec->opts.nr_cblocks = nr_cblocks_default;
2969	}
2970	#endif
2971	if (!strcmp(var, "record.debuginfod")) {
2972	rec->debuginfod.urls = strdup(value);
2973	if (!rec->debuginfod.urls)
2974	return -ENOMEM;
2975	rec->debuginfod.set = true;
2976	}
2977
2978	return `0`;
2979	}
2980
2981	static int record__parse_event_enable_time(const struct option opt, const* char str, int* unset)
2982	{
2983	struct record rec = (struct* record *)opt->value;
2984
2985	return evlist__parse_event_enable_time(evlist: rec->evlist, opts: &rec->opts, str, unset);
2986	}
2987
2988	static int record__parse_affinity(const struct option opt, const* char str, int* unset)
2989	{
2990	struct record_opts opts = (struct* record_opts *)opt->value;
2991
2992	if (unset \|\| !str)
2993	return `0`;
2994
2995	if (!strcasecmp(s1: str, s2: "node"))
2996	opts->affinity = PERF_AFFINITY_NODE;
2997	else if (!strcasecmp(s1: str, s2: "cpu"))
2998	opts->affinity = PERF_AFFINITY_CPU;
2999
3000	return `0`;
3001	}
3002
3003	static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask mask, int* nr_bits)
3004	{
3005	mask->nbits = nr_bits;
3006	mask->bits = bitmap_zalloc(mask->nbits);
3007	if (!mask->bits)
3008	return -ENOMEM;
3009
3010	return `0`;
3011	}
3012
3013	static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3014	{
3015	bitmap_free(bitmap: mask->bits);
3016	mask->nbits = `0`;
3017	}
3018
3019	static int record__thread_mask_alloc(struct thread_mask mask, int* nr_bits)
3020	{
3021	int ret;
3022
3023	ret = record__mmap_cpu_mask_alloc(mask: &mask->maps, nr_bits);
3024	if (ret) {
3025	mask->affinity.bits = NULL;
3026	return ret;
3027	}
3028
3029	ret = record__mmap_cpu_mask_alloc(mask: &mask->affinity, nr_bits);
3030	if (ret) {
3031	record__mmap_cpu_mask_free(mask: &mask->maps);
3032	mask->maps.bits = NULL;
3033	}
3034
3035	return ret;
3036	}
3037
3038	static void record__thread_mask_free(struct thread_mask *mask)
3039	{
3040	record__mmap_cpu_mask_free(mask: &mask->maps);
3041	record__mmap_cpu_mask_free(mask: &mask->affinity);
3042	}
3043
3044	static int record__parse_threads(const struct option opt, const* char str, int* unset)
3045	{
3046	int s;
3047	struct record_opts *opts = opt->value;
3048
3049	if (unset \|\| !str \|\| !strlen(str)) {
3050	opts->threads_spec = THREAD_SPEC__CPU;
3051	} else {
3052	for (s = `1`; s < THREAD_SPEC__MAX; s++) {
3053	if (s == THREAD_SPEC__USER) {
3054	opts->threads_user_spec = strdup(str);
3055	if (!opts->threads_user_spec)
3056	return -ENOMEM;
3057	opts->threads_spec = THREAD_SPEC__USER;
3058	break;
3059	}
3060	if (!strncasecmp(s1: str, s2: thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3061	opts->threads_spec = s;
3062	break;
3063	}
3064	}
3065	}
3066
3067	if (opts->threads_spec == THREAD_SPEC__USER)
3068	pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3069	else
3070	pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3071
3072	return `0`;
3073	}
3074
3075	static int parse_output_max_size(const struct option *opt,
3076	const char str, int* unset)
3077	{
3078	unsigned long s = (unsigned* long *)opt->value;
3079	static struct parse_tag tags_size[] = {
3080	{ .tag = `'B'`, .mult = `1` },
3081	{ .tag = `'K'`, .mult = `1` << `10` },
3082	{ .tag = `'M'`, .mult = `1` << `20` },
3083	{ .tag = `'G'`, .mult = `1` << `30` },
3084	{ .tag = `0` },
3085	};
3086	unsigned long val;
3087
3088	if (unset) {
3089	*s = `0`;
3090	return `0`;
3091	}
3092
3093	val = parse_tag_value(str, tags: tags_size);
3094	if (val != (unsigned long) -`1`) {
3095	*s = val;
3096	return `0`;
3097	}
3098
3099	return -`1`;
3100	}
3101
3102	static int record__parse_mmap_pages(const struct option *opt,
3103	const char *str,
3104	int unset __maybe_unused)
3105	{
3106	struct record_opts *opts = opt->value;
3107	char s, p;
3108	unsigned int mmap_pages;
3109	int ret;
3110
3111	if (!str)
3112	return -EINVAL;
3113
3114	s = strdup(str);
3115	if (!s)
3116	return -ENOMEM;
3117
3118	p = strchr(s, `','`);
3119	if (p)
3120	*p = `'\0'`;
3121
3122	if (*s) {
3123	ret = __evlist__parse_mmap_pages(mmap_pages: &mmap_pages, str: s);
3124	if (ret)
3125	goto out_free;
3126	opts->mmap_pages = mmap_pages;
3127	}
3128
3129	if (!p) {
3130	ret = `0`;
3131	goto out_free;
3132	}
3133
3134	ret = __evlist__parse_mmap_pages(mmap_pages: &mmap_pages, str: p + `1`);
3135	if (ret)
3136	goto out_free;
3137
3138	opts->auxtrace_mmap_pages = mmap_pages;
3139
3140	out_free:
3141	free(s);
3142	return ret;
3143	}
3144
3145	void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3146	{
3147	}
3148
3149	static int parse_control_option(const struct option *opt,
3150	const char *str,
3151	int unset __maybe_unused)
3152	{
3153	struct record_opts *opts = opt->value;
3154
3155	return evlist__parse_control(str, ctl_fd: &opts->ctl_fd, ctl_fd_ack: &opts->ctl_fd_ack, ctl_fd_close: &opts->ctl_fd_close);
3156	}
3157
3158	static void switch_output_size_warn(struct record *rec)
3159	{
3160	u64 wakeup_size = evlist__mmap_size(pages: rec->opts.mmap_pages);
3161	struct switch_output *s = &rec->switch_output;
3162
3163	wakeup_size /= `2`;
3164
3165	if (s->size < wakeup_size) {
3166	char buf[`100`];
3167
3168	unit_number__scnprintf(buf, size: sizeof(buf), n: wakeup_size);
3169	pr_warning("WARNING: switch-output data size lower than "
3170	"wakeup kernel buffer size (%s) "
3171	"expect bigger perf.data sizes\n", buf);
3172	}
3173	}
3174
3175	static int switch_output_setup(struct record *rec)
3176	{
3177	struct switch_output *s = &rec->switch_output;
3178	static struct parse_tag tags_size[] = {
3179	{ .tag = `'B'`, .mult = `1` },
3180	{ .tag = `'K'`, .mult = `1` << `10` },
3181	{ .tag = `'M'`, .mult = `1` << `20` },
3182	{ .tag = `'G'`, .mult = `1` << `30` },
3183	{ .tag = `0` },
3184	};
3185	static struct parse_tag tags_time[] = {
3186	{ .tag = `'s'`, .mult = `1` },
3187	{ .tag = `'m'`, .mult = `60` },
3188	{ .tag = `'h'`, .mult = `60`*`60` },
3189	{ .tag = `'d'`, .mult = `60``60``24` },
3190	{ .tag = `0` },
3191	};
3192	unsigned long val;
3193
3194	/*
3195	* If we're using --switch-output-events, then we imply its
3196	* --switch-output=signal, as we'll send a SIGUSR2 from the side band
3197	* thread to its parent.
3198	*/
3199	if (rec->switch_output_event_set) {
3200	if (record__threads_enabled(rec)) {
3201	pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3202	return `0`;
3203	}
3204	goto do_signal;
3205	}
3206
3207	if (!s->set)
3208	return `0`;
3209
3210	if (record__threads_enabled(rec)) {
3211	pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3212	return `0`;
3213	}
3214
3215	if (!strcmp(s->str, "signal")) {
3216	do_signal:
3217	s->signal = true;
3218	pr_debug("switch-output with SIGUSR2 signal\n");
3219	goto enabled;
3220	}
3221
3222	val = parse_tag_value(str: s->str, tags: tags_size);
3223	if (val != (unsigned long) -`1`) {
3224	s->size = val;
3225	pr_debug("switch-output with %s size threshold\n", s->str);
3226	goto enabled;
3227	}
3228
3229	val = parse_tag_value(str: s->str, tags: tags_time);
3230	if (val != (unsigned long) -`1`) {
3231	s->time = val;
3232	pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3233	s->str, s->time);
3234	goto enabled;
3235	}
3236
3237	return -`1`;
3238
3239	enabled:
3240	rec->timestamp_filename = true;
3241	s->enabled = true;
3242
3243	if (s->size && !rec->opts.no_buffering)
3244	switch_output_size_warn(rec);
3245
3246	return `0`;
3247	}
3248
3249	static const char * const __record_usage[] = {
3250	"perf record [<options>] [<command>]",
3251	"perf record [<options>] -- <command> [<options>]",
3252	NULL
3253	};
3254	const char * const *record_usage = __record_usage;
3255
3256	static int build_id__process_mmap(struct perf_tool tool, union* perf_event *event,
3257	struct perf_sample sample, struct* machine *machine)
3258	{
3259	/*
3260	* We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3261	* no need to add them twice.
3262	*/
3263	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3264	return `0`;
3265	return perf_event__process_mmap(tool, event, sample, machine);
3266	}
3267
3268	static int build_id__process_mmap2(struct perf_tool tool, union* perf_event *event,
3269	struct perf_sample sample, struct* machine *machine)
3270	{
3271	/*
3272	* We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3273	* no need to add them twice.
3274	*/
3275	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3276	return `0`;
3277
3278	return perf_event__process_mmap2(tool, event, sample, machine);
3279	}
3280
3281	static int process_timestamp_boundary(struct perf_tool *tool,
3282	union perf_event *event __maybe_unused,
3283	struct perf_sample *sample,
3284	struct machine *machine __maybe_unused)
3285	{
3286	struct record rec = container_of(tool, struct* record, tool);
3287
3288	set_timestamp_boundary(rec, sample_time: sample->time);
3289	return `0`;
3290	}
3291
3292	static int parse_record_synth_option(const struct option *opt,
3293	const char *str,
3294	int unset __maybe_unused)
3295	{
3296	struct record_opts *opts = opt->value;
3297	char *p = strdup(str);
3298
3299	if (p == NULL)
3300	return -`1`;
3301
3302	opts->synth = parse_synth_opt(str: p);
3303	free(p);
3304
3305	if (opts->synth < `0`) {
3306	pr_err("Invalid synth option: %s\n", str);
3307	return -`1`;
3308	}
3309	return `0`;
3310	}
3311
3312	/*
3313	* XXX Ideally would be local to cmd_record() and passed to a record__new
3314	* because we need to have access to it in record__exit, that is called
3315	* after cmd_record() exits, but since record_options need to be accessible to
3316	* builtin-script, leave it here.
3317	*
3318	* At least we don't ouch it in all the other functions here directly.
3319	*
3320	* Just say no to tons of global variables, sigh.
3321	*/
3322	static struct record record = {
3323	.opts = {
3324	.sample_time = true,
3325	.mmap_pages = UINT_MAX,
3326	.user_freq = UINT_MAX,
3327	.user_interval = ULLONG_MAX,
3328	.freq = `4000`,
3329	.target = {
3330	.uses_mmap = true,
3331	.default_per_cpu = true,
3332	},
3333	.mmap_flush = MMAP_FLUSH_DEFAULT,
3334	.nr_threads_synthesize = `1`,
3335	.ctl_fd = -`1`,
3336	.ctl_fd_ack = -`1`,
3337	.synth = PERF_SYNTH_ALL,
3338	},
3339	.tool = {
3340	.sample = process_sample_event,
3341	.fork = perf_event__process_fork,
3342	.exit = perf_event__process_exit,
3343	.comm = perf_event__process_comm,
3344	.namespaces = perf_event__process_namespaces,
3345	.mmap = build_id__process_mmap,
3346	.mmap2 = build_id__process_mmap2,
3347	.itrace_start = process_timestamp_boundary,
3348	.aux = process_timestamp_boundary,
3349	.ordered_events = true,
3350	},
3351	};
3352
3353	const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3354	"\n\t\t\t\tDefault: fp";
3355
3356	static bool dry_run;
3357
3358	static struct parse_events_option_args parse_events_option_args = {
3359	.evlistp = &record.evlist,
3360	};
3361
3362	static struct parse_events_option_args switch_output_parse_events_option_args = {
3363	.evlistp = &record.sb_evlist,
3364	};
3365
3366	/*
3367	* XXX Will stay a global variable till we fix builtin-script.c to stop messing
3368	* with it and switch to use the library functions in perf_evlist that came
3369	* from builtin-record.c, i.e. use record_opts,
3370	* evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3371	* using pipes, etc.
3372	*/
3373	static struct option __record_options[] = {
3374	OPT_CALLBACK(`'e'`, "event", &parse_events_option_args, "event",
3375	"event selector. use 'perf list' to list available events",
3376	parse_events_option),
3377	OPT_CALLBACK(`0`, "filter", &record.evlist, "filter",
3378	"event filter", parse_filter),
3379	OPT_CALLBACK_NOOPT(`0`, "exclude-perf", &record.evlist,
3380	NULL, "don't record events from perf itself",
3381	exclude_perf),
3382	OPT_STRING(`'p'`, "pid", &record.opts.target.pid, "pid",
3383	"record events on existing process id"),
3384	OPT_STRING(`'t'`, "tid", &record.opts.target.tid, "tid",
3385	"record events on existing thread id"),
3386	OPT_INTEGER(`'r'`, "realtime", &record.realtime_prio,
3387	"collect data with this RT SCHED_FIFO priority"),
3388	OPT_BOOLEAN(`0`, "no-buffering", &record.opts.no_buffering,
3389	"collect data without buffering"),
3390	OPT_BOOLEAN(`'R'`, "raw-samples", &record.opts.raw_samples,
3391	"collect raw sample records from all opened counters"),
3392	OPT_BOOLEAN(`'a'`, "all-cpus", &record.opts.target.system_wide,
3393	"system-wide collection from all CPUs"),
3394	OPT_STRING(`'C'`, "cpu", &record.opts.target.cpu_list, "cpu",
3395	"list of cpus to monitor"),
3396	OPT_U64(`'c'`, "count", &record.opts.user_interval, "event period to sample"),
3397	OPT_STRING(`'o'`, "output", &record.data.path, "file",
3398	"output file name"),
3399	OPT_BOOLEAN_SET(`'i'`, "no-inherit", &record.opts.no_inherit,
3400	&record.opts.no_inherit_set,
3401	"child tasks do not inherit counters"),
3402	OPT_BOOLEAN(`0`, "tail-synthesize", &record.opts.tail_synthesize,
3403	"synthesize non-sample events at the end of output"),
3404	OPT_BOOLEAN(`0`, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3405	OPT_BOOLEAN(`0`, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3406	OPT_BOOLEAN(`0`, "strict-freq", &record.opts.strict_freq,
3407	"Fail if the specified frequency can't be used"),
3408	OPT_CALLBACK(`'F'`, "freq", &record.opts, "freq or 'max'",
3409	"profile at this frequency",
3410	record__parse_freq),
3411	OPT_CALLBACK(`'m'`, "mmap-pages", &record.opts, "pages[,pages]",
3412	"number of mmap data pages and AUX area tracing mmap pages",
3413	record__parse_mmap_pages),
3414	OPT_CALLBACK(`0`, "mmap-flush", &record.opts, "number",
3415	"Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3416	record__mmap_flush_parse),
3417	OPT_CALLBACK_NOOPT(`'g'`, NULL, &callchain_param,
3418	NULL, "enables call-graph recording" ,
3419	&record_callchain_opt),
3420	OPT_CALLBACK(`0`, "call-graph", &record.opts,
3421	"record_mode[,record_size]", record_callchain_help,
3422	&record_parse_callchain_opt),
3423	OPT_INCR(`'v'`, "verbose", &verbose,
3424	"be more verbose (show counter open errors, etc)"),
3425	OPT_BOOLEAN(`'q'`, "quiet", &quiet, "don't print any warnings or messages"),
3426	OPT_BOOLEAN(`'s'`, "stat", &record.opts.inherit_stat,
3427	"per thread counts"),
3428	OPT_BOOLEAN(`'d'`, "data", &record.opts.sample_address, "Record the sample addresses"),
3429	OPT_BOOLEAN(`0`, "phys-data", &record.opts.sample_phys_addr,
3430	"Record the sample physical addresses"),
3431	OPT_BOOLEAN(`0`, "data-page-size", &record.opts.sample_data_page_size,
3432	"Record the sampled data address data page size"),
3433	OPT_BOOLEAN(`0`, "code-page-size", &record.opts.sample_code_page_size,
3434	"Record the sampled code address (ip) page size"),
3435	OPT_BOOLEAN(`0`, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3436	OPT_BOOLEAN(`0`, "sample-identifier", &record.opts.sample_identifier,
3437	"Record the sample identifier"),
3438	OPT_BOOLEAN_SET(`'T'`, "timestamp", &record.opts.sample_time,
3439	&record.opts.sample_time_set,
3440	"Record the sample timestamps"),
3441	OPT_BOOLEAN_SET(`'P'`, "period", &record.opts.period, &record.opts.period_set,
3442	"Record the sample period"),
3443	OPT_BOOLEAN(`'n'`, "no-samples", &record.opts.no_samples,
3444	"don't sample"),
3445	OPT_BOOLEAN_SET(`'N'`, "no-buildid-cache", &record.no_buildid_cache,
3446	&record.no_buildid_cache_set,
3447	"do not update the buildid cache"),
3448	OPT_BOOLEAN_SET(`'B'`, "no-buildid", &record.no_buildid,
3449	&record.no_buildid_set,
3450	"do not collect buildids in perf.data"),
3451	OPT_CALLBACK(`'G'`, "cgroup", &record.evlist, "name",
3452	"monitor event in cgroup name only",
3453	parse_cgroups),
3454	OPT_CALLBACK(`'D'`, "delay", &record, "ms",
3455	"ms to wait before starting measurement after program start (-1: start with events disabled), "
3456	"or ranges of time to enable events e.g. '-D 10-20,30-40'",
3457	record__parse_event_enable_time),
3458	OPT_BOOLEAN(`0`, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3459	OPT_STRING(`'u'`, "uid", &record.opts.target.uid_str, "user",
3460	"user to profile"),
3461
3462	OPT_CALLBACK_NOOPT(`'b'`, "branch-any", &record.opts.branch_stack,
3463	"branch any", "sample any taken branches",
3464	parse_branch_stack),
3465
3466	OPT_CALLBACK(`'j'`, "branch-filter", &record.opts.branch_stack,
3467	"branch filter mask", "branch stack filter modes",
3468	parse_branch_stack),
3469	OPT_BOOLEAN(`'W'`, "weight", &record.opts.sample_weight,
3470	"sample by weight (on special events only)"),
3471	OPT_BOOLEAN(`0`, "transaction", &record.opts.sample_transaction,
3472	"sample transaction flags (special events only)"),
3473	OPT_BOOLEAN(`0`, "per-thread", &record.opts.target.per_thread,
3474	"use per-thread mmaps"),
3475	OPT_CALLBACK_OPTARG(`'I'`, "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3476	"sample selected machine registers on interrupt,"
3477	" use '-I?' to list register names", parse_intr_regs),
3478	OPT_CALLBACK_OPTARG(`0`, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3479	"sample selected machine registers on interrupt,"
3480	" use '--user-regs=?' to list register names", parse_user_regs),
3481	OPT_BOOLEAN(`0`, "running-time", &record.opts.running_time,
3482	"Record running/enabled time of read (:S) events"),
3483	OPT_CALLBACK(`'k'`, "clockid", &record.opts,
3484	"clockid", "clockid to use for events, see clock_gettime()",
3485	parse_clockid),
3486	OPT_STRING_OPTARG(`'S'`, "snapshot", &record.opts.auxtrace_snapshot_opts,
3487	"opts", "AUX area tracing Snapshot Mode", ""),
3488	OPT_STRING_OPTARG(`0`, "aux-sample", &record.opts.auxtrace_sample_opts,
3489	"opts", "sample AUX area", ""),
3490	OPT_UINTEGER(`0`, "proc-map-timeout", &proc_map_timeout,
3491	"per thread proc mmap processing timeout in ms"),
3492	OPT_BOOLEAN(`0`, "namespaces", &record.opts.record_namespaces,
3493	"Record namespaces events"),
3494	OPT_BOOLEAN(`0`, "all-cgroups", &record.opts.record_cgroup,
3495	"Record cgroup events"),
3496	OPT_BOOLEAN_SET(`0`, "switch-events", &record.opts.record_switch_events,
3497	&record.opts.record_switch_events_set,
3498	"Record context switch events"),
3499	OPT_BOOLEAN_FLAG(`0`, "all-kernel", &record.opts.all_kernel,
3500	"Configure all used events to run in kernel space.",
3501	PARSE_OPT_EXCLUSIVE),
3502	OPT_BOOLEAN_FLAG(`0`, "all-user", &record.opts.all_user,
3503	"Configure all used events to run in user space.",
3504	PARSE_OPT_EXCLUSIVE),
3505	OPT_BOOLEAN(`0`, "kernel-callchains", &record.opts.kernel_callchains,
3506	"collect kernel callchains"),
3507	OPT_BOOLEAN(`0`, "user-callchains", &record.opts.user_callchains,
3508	"collect user callchains"),
3509	OPT_STRING(`0`, "vmlinux", &symbol_conf.vmlinux_name,
3510	"file", "vmlinux pathname"),
3511	OPT_BOOLEAN(`0`, "buildid-all", &record.buildid_all,
3512	"Record build-id of all DSOs regardless of hits"),
3513	OPT_BOOLEAN(`0`, "buildid-mmap", &record.buildid_mmap,
3514	"Record build-id in map events"),
3515	OPT_BOOLEAN(`0`, "timestamp-filename", &record.timestamp_filename,
3516	"append timestamp to output filename"),
3517	OPT_BOOLEAN(`0`, "timestamp-boundary", &record.timestamp_boundary,
3518	"Record timestamp boundary (time of first/last samples)"),
3519	OPT_STRING_OPTARG_SET(`0`, "switch-output", &record.switch_output.str,
3520	&record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3521	"Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3522	"signal"),
3523	OPT_CALLBACK_SET(`0`, "switch-output-event", &switch_output_parse_events_option_args,
3524	&record.switch_output_event_set, "switch output event",
3525	"switch output event selector. use 'perf list' to list available events",
3526	parse_events_option_new_evlist),
3527	OPT_INTEGER(`0`, "switch-max-files", &record.switch_output.num_files,
3528	"Limit number of switch output generated files"),
3529	OPT_BOOLEAN(`0`, "dry-run", &dry_run,
3530	"Parse options then exit"),
3531	#ifdef HAVE_AIO_SUPPORT
3532	OPT_CALLBACK_OPTARG(`0`, "aio", &record.opts,
3533	&nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3534	record__aio_parse),
3535	#endif
3536	OPT_CALLBACK(`0`, "affinity", &record.opts, "node\|cpu",
3537	"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3538	record__parse_affinity),
3539	#ifdef HAVE_ZSTD_SUPPORT
3540	OPT_CALLBACK_OPTARG(`'z'`, "compression-level", &record.opts, &comp_level_default, "n",
3541	"Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3542	record__parse_comp_level),
3543	#endif
3544	OPT_CALLBACK(`0`, "max-size", &record.output_max_size,
3545	"size", "Limit the maximum size of the output file", parse_output_max_size),
3546	OPT_UINTEGER(`0`, "num-thread-synthesize",
3547	&record.opts.nr_threads_synthesize,
3548	"number of threads to run for event synthesis"),
3549	#ifdef HAVE_LIBPFM
3550	OPT_CALLBACK(`0`, "pfm-events", &record.evlist, "event",
3551	"libpfm4 event selector. use 'perf list' to list available events",
3552	parse_libpfm_events_option),
3553	#endif
3554	OPT_CALLBACK(`0`, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3555	"Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3556	"\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3557	"\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3558	"\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3559	parse_control_option),
3560	OPT_CALLBACK(`0`, "synth", &record.opts, "no\|all\|task\|mmap\|cgroup",
3561	"Fine-tune event synthesis: default=all", parse_record_synth_option),
3562	OPT_STRING_OPTARG_SET(`0`, "debuginfod", &record.debuginfod.urls,
3563	&record.debuginfod.set, "debuginfod urls",
3564	"Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3565	"system"),
3566	OPT_CALLBACK_OPTARG(`0`, "threads", &record.opts, NULL, "spec",
3567	"write collected trace data into several data files using parallel threads",
3568	record__parse_threads),
3569	OPT_BOOLEAN(`0`, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3570	OPT_END()
3571	};
3572
3573	struct option *record_options = __record_options;
3574
3575	static int record__mmap_cpu_mask_init(struct mmap_cpu_mask mask, struct* perf_cpu_map *cpus)
3576	{
3577	struct perf_cpu cpu;
3578	int idx;
3579
3580	if (cpu_map__is_dummy(cpus))
3581	return `0`;
3582
3583	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
3584	if (cpu.cpu == -`1`)
3585	continue;
3586	/ Return ENODEV is input cpu is greater than max cpu /
3587	if ((unsigned long)cpu.cpu > mask->nbits)
3588	return -ENODEV;
3589	__set_bit(cpu.cpu, mask->bits);
3590	}
3591
3592	return `0`;
3593	}
3594
3595	static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask mask, const* char *mask_spec)
3596	{
3597	struct perf_cpu_map *cpus;
3598
3599	cpus = perf_cpu_map__new(mask_spec);
3600	if (!cpus)
3601	return -ENOMEM;
3602
3603	bitmap_zero(dst: mask->bits, nbits: mask->nbits);
3604	if (record__mmap_cpu_mask_init(mask, cpus))
3605	return -ENODEV;
3606
3607	perf_cpu_map__put(cpus);
3608
3609	return `0`;
3610	}
3611
3612	static void record__free_thread_masks(struct record rec, int* nr_threads)
3613	{
3614	int t;
3615
3616	if (rec->thread_masks)
3617	for (t = `0`; t < nr_threads; t++)
3618	record__thread_mask_free(mask: &rec->thread_masks[t]);
3619
3620	zfree(&rec->thread_masks);
3621	}
3622
3623	static int record__alloc_thread_masks(struct record rec, int* nr_threads, int nr_bits)
3624	{
3625	int t, ret;
3626
3627	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3628	if (!rec->thread_masks) {
3629	pr_err("Failed to allocate thread masks\n");
3630	return -ENOMEM;
3631	}
3632
3633	for (t = `0`; t < nr_threads; t++) {
3634	ret = record__thread_mask_alloc(mask: &rec->thread_masks[t], nr_bits);
3635	if (ret) {
3636	pr_err("Failed to allocate thread masks[%d]\n", t);
3637	goto out_free;
3638	}
3639	}
3640
3641	return `0`;
3642
3643	out_free:
3644	record__free_thread_masks(rec, nr_threads);
3645
3646	return ret;
3647	}
3648
3649	static int record__init_thread_cpu_masks(struct record rec, struct* perf_cpu_map *cpus)
3650	{
3651	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3652
3653	ret = record__alloc_thread_masks(rec, nr_threads: nr_cpus, nr_bits: cpu__max_cpu().cpu);
3654	if (ret)
3655	return ret;
3656
3657	rec->nr_threads = nr_cpus;
3658	pr_debug("nr_threads: %d\n", rec->nr_threads);
3659
3660	for (t = `0`; t < rec->nr_threads; t++) {
3661	__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3662	__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3663	if (verbose > `0`) {
3664	pr_debug("thread_masks[%d]: ", t);
3665	mmap_cpu_mask__scnprintf(mask: &rec->thread_masks[t].maps, tag: "maps");
3666	pr_debug("thread_masks[%d]: ", t);
3667	mmap_cpu_mask__scnprintf(mask: &rec->thread_masks[t].affinity, tag: "affinity");
3668	}
3669	}
3670
3671	return `0`;
3672	}
3673
3674	static int record__init_thread_masks_spec(struct record rec, struct* perf_cpu_map *cpus,
3675	const char *maps_spec, const* char **affinity_spec,
3676	u32 nr_spec)
3677	{
3678	u32 s;
3679	int ret = `0`, t = `0`;
3680	struct mmap_cpu_mask cpus_mask;
3681	struct thread_mask thread_mask, full_mask, *thread_masks;
3682
3683	ret = record__mmap_cpu_mask_alloc(mask: &cpus_mask, nr_bits: cpu__max_cpu().cpu);
3684	if (ret) {
3685	pr_err("Failed to allocate CPUs mask\n");
3686	return ret;
3687	}
3688
3689	ret = record__mmap_cpu_mask_init(mask: &cpus_mask, cpus);
3690	if (ret) {
3691	pr_err("Failed to init cpu mask\n");
3692	goto out_free_cpu_mask;
3693	}
3694
3695	ret = record__thread_mask_alloc(mask: &full_mask, nr_bits: cpu__max_cpu().cpu);
3696	if (ret) {
3697	pr_err("Failed to allocate full mask\n");
3698	goto out_free_cpu_mask;
3699	}
3700
3701	ret = record__thread_mask_alloc(mask: &thread_mask, nr_bits: cpu__max_cpu().cpu);
3702	if (ret) {
3703	pr_err("Failed to allocate thread mask\n");
3704	goto out_free_full_and_cpu_masks;
3705	}
3706
3707	for (s = `0`; s < nr_spec; s++) {
3708	ret = record__mmap_cpu_mask_init_spec(mask: &thread_mask.maps, mask_spec: maps_spec[s]);
3709	if (ret) {
3710	pr_err("Failed to initialize maps thread mask\n");
3711	goto out_free;
3712	}
3713	ret = record__mmap_cpu_mask_init_spec(mask: &thread_mask.affinity, mask_spec: affinity_spec[s]);
3714	if (ret) {
3715	pr_err("Failed to initialize affinity thread mask\n");
3716	goto out_free;
3717	}
3718
3719	/ ignore invalid CPUs but do not allow empty masks /
3720	if (!bitmap_and(dst: thread_mask.maps.bits, src1: thread_mask.maps.bits,
3721	src2: cpus_mask.bits, nbits: thread_mask.maps.nbits)) {
3722	pr_err("Empty maps mask: %s\n", maps_spec[s]);
3723	ret = -EINVAL;
3724	goto out_free;
3725	}
3726	if (!bitmap_and(dst: thread_mask.affinity.bits, src1: thread_mask.affinity.bits,
3727	src2: cpus_mask.bits, nbits: thread_mask.affinity.nbits)) {
3728	pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3729	ret = -EINVAL;
3730	goto out_free;
3731	}
3732
3733	/ do not allow intersection with other masks (full_mask) /
3734	if (bitmap_intersects(src1: thread_mask.maps.bits, src2: full_mask.maps.bits,
3735	nbits: thread_mask.maps.nbits)) {
3736	pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3737	ret = -EINVAL;
3738	goto out_free;
3739	}
3740	if (bitmap_intersects(src1: thread_mask.affinity.bits, src2: full_mask.affinity.bits,
3741	nbits: thread_mask.affinity.nbits)) {
3742	pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3743	ret = -EINVAL;
3744	goto out_free;
3745	}
3746
3747	bitmap_or(dst: full_mask.maps.bits, src1: full_mask.maps.bits,
3748	src2: thread_mask.maps.bits, nbits: full_mask.maps.nbits);
3749	bitmap_or(dst: full_mask.affinity.bits, src1: full_mask.affinity.bits,
3750	src2: thread_mask.affinity.bits, nbits: full_mask.maps.nbits);
3751
3752	thread_masks = realloc(rec->thread_masks, (t + `1`) * sizeof(struct thread_mask));
3753	if (!thread_masks) {
3754	pr_err("Failed to reallocate thread masks\n");
3755	ret = -ENOMEM;
3756	goto out_free;
3757	}
3758	rec->thread_masks = thread_masks;
3759	rec->thread_masks[t] = thread_mask;
3760	if (verbose > `0`) {
3761	pr_debug("thread_masks[%d]: ", t);
3762	mmap_cpu_mask__scnprintf(mask: &rec->thread_masks[t].maps, tag: "maps");
3763	pr_debug("thread_masks[%d]: ", t);
3764	mmap_cpu_mask__scnprintf(mask: &rec->thread_masks[t].affinity, tag: "affinity");
3765	}
3766	t++;
3767	ret = record__thread_mask_alloc(mask: &thread_mask, nr_bits: cpu__max_cpu().cpu);
3768	if (ret) {
3769	pr_err("Failed to allocate thread mask\n");
3770	goto out_free_full_and_cpu_masks;
3771	}
3772	}
3773	rec->nr_threads = t;
3774	pr_debug("nr_threads: %d\n", rec->nr_threads);
3775	if (!rec->nr_threads)
3776	ret = -EINVAL;
3777
3778	out_free:
3779	record__thread_mask_free(mask: &thread_mask);
3780	out_free_full_and_cpu_masks:
3781	record__thread_mask_free(mask: &full_mask);
3782	out_free_cpu_mask:
3783	record__mmap_cpu_mask_free(mask: &cpus_mask);
3784
3785	return ret;
3786	}
3787
3788	static int record__init_thread_core_masks(struct record rec, struct* perf_cpu_map *cpus)
3789	{
3790	int ret;
3791	struct cpu_topology *topo;
3792
3793	topo = cpu_topology__new();
3794	if (!topo) {
3795	pr_err("Failed to allocate CPU topology\n");
3796	return -ENOMEM;
3797	}
3798
3799	ret = record__init_thread_masks_spec(rec, cpus, maps_spec: topo->core_cpus_list,
3800	affinity_spec: topo->core_cpus_list, nr_spec: topo->core_cpus_lists);
3801	cpu_topology__delete(topo);
3802
3803	return ret;
3804	}
3805
3806	static int record__init_thread_package_masks(struct record rec, struct* perf_cpu_map *cpus)
3807	{
3808	int ret;
3809	struct cpu_topology *topo;
3810
3811	topo = cpu_topology__new();
3812	if (!topo) {
3813	pr_err("Failed to allocate CPU topology\n");
3814	return -ENOMEM;
3815	}
3816
3817	ret = record__init_thread_masks_spec(rec, cpus, maps_spec: topo->package_cpus_list,
3818	affinity_spec: topo->package_cpus_list, nr_spec: topo->package_cpus_lists);
3819	cpu_topology__delete(topo);
3820
3821	return ret;
3822	}
3823
3824	static int record__init_thread_numa_masks(struct record rec, struct* perf_cpu_map *cpus)
3825	{
3826	u32 s;
3827	int ret;
3828	const char **spec;
3829	struct numa_topology *topo;
3830
3831	topo = numa_topology__new();
3832	if (!topo) {
3833	pr_err("Failed to allocate NUMA topology\n");
3834	return -ENOMEM;
3835	}
3836
3837	spec = zalloc(topo->nr * sizeof(char *));
3838	if (!spec) {
3839	pr_err("Failed to allocate NUMA spec\n");
3840	ret = -ENOMEM;
3841	goto out_delete_topo;
3842	}
3843	for (s = `0`; s < topo->nr; s++)
3844	spec[s] = topo->nodes[s].cpus;
3845
3846	ret = record__init_thread_masks_spec(rec, cpus, maps_spec: spec, affinity_spec: spec, nr_spec: topo->nr);
3847
3848	zfree(&spec);
3849
3850	out_delete_topo:
3851	numa_topology__delete(topo);
3852
3853	return ret;
3854	}
3855
3856	static int record__init_thread_user_masks(struct record rec, struct* perf_cpu_map *cpus)
3857	{
3858	int t, ret;
3859	u32 s, nr_spec = `0`;
3860	char maps_spec = NULL, affinity_spec = NULL, **tmp_spec;
3861	char user_spec, spec, spec_ptr, mask, mask_ptr, dup_mask = NULL;
3862
3863	for (t = `0`, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3864	spec = strtok_r(user_spec, ":", &spec_ptr);
3865	if (spec == NULL)
3866	break;
3867	pr_debug2("threads_spec[%d]: %s\n", t, spec);
3868	mask = strtok_r(spec, "/", &mask_ptr);
3869	if (mask == NULL)
3870	break;
3871	pr_debug2(" maps mask: %s\n", mask);
3872	tmp_spec = realloc(maps_spec, (nr_spec + `1`) * sizeof(char *));
3873	if (!tmp_spec) {
3874	pr_err("Failed to reallocate maps spec\n");
3875	ret = -ENOMEM;
3876	goto out_free;
3877	}
3878	maps_spec = tmp_spec;
3879	maps_spec[nr_spec] = dup_mask = strdup(mask);
3880	if (!maps_spec[nr_spec]) {
3881	pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3882	ret = -ENOMEM;
3883	goto out_free;
3884	}
3885	mask = strtok_r(NULL, "/", &mask_ptr);
3886	if (mask == NULL) {
3887	pr_err("Invalid thread maps or affinity specs\n");
3888	ret = -EINVAL;
3889	goto out_free;
3890	}
3891	pr_debug2(" affinity mask: %s\n", mask);
3892	tmp_spec = realloc(affinity_spec, (nr_spec + `1`) * sizeof(char *));
3893	if (!tmp_spec) {
3894	pr_err("Failed to reallocate affinity spec\n");
3895	ret = -ENOMEM;
3896	goto out_free;
3897	}
3898	affinity_spec = tmp_spec;
3899	affinity_spec[nr_spec] = strdup(mask);
3900	if (!affinity_spec[nr_spec]) {
3901	pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3902	ret = -ENOMEM;
3903	goto out_free;
3904	}
3905	dup_mask = NULL;
3906	nr_spec++;
3907	}
3908
3909	ret = record__init_thread_masks_spec(rec, cpus, maps_spec: (const char **)maps_spec,
3910	affinity_spec: (const char **)affinity_spec, nr_spec);
3911
3912	out_free:
3913	free(dup_mask);
3914	for (s = `0`; s < nr_spec; s++) {
3915	if (maps_spec)
3916	free(maps_spec[s]);
3917	if (affinity_spec)
3918	free(affinity_spec[s]);
3919	}
3920	free(affinity_spec);
3921	free(maps_spec);
3922
3923	return ret;
3924	}
3925
3926	static int record__init_thread_default_masks(struct record rec, struct* perf_cpu_map *cpus)
3927	{
3928	int ret;
3929
3930	ret = record__alloc_thread_masks(rec, nr_threads: `1`, nr_bits: cpu__max_cpu().cpu);
3931	if (ret)
3932	return ret;
3933
3934	if (record__mmap_cpu_mask_init(mask: &rec->thread_masks->maps, cpus))
3935	return -ENODEV;
3936
3937	rec->nr_threads = `1`;
3938
3939	return `0`;
3940	}
3941
3942	static int record__init_thread_masks(struct record *rec)
3943	{
3944	int ret = `0`;
3945	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3946
3947	if (!record__threads_enabled(rec))
3948	return record__init_thread_default_masks(rec, cpus);
3949
3950	if (evlist__per_thread(evlist: rec->evlist)) {
3951	pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3952	return -EINVAL;
3953	}
3954
3955	switch (rec->opts.threads_spec) {
3956	case THREAD_SPEC__CPU:
3957	ret = record__init_thread_cpu_masks(rec, cpus);
3958	break;
3959	case THREAD_SPEC__CORE:
3960	ret = record__init_thread_core_masks(rec, cpus);
3961	break;
3962	case THREAD_SPEC__PACKAGE:
3963	ret = record__init_thread_package_masks(rec, cpus);
3964	break;
3965	case THREAD_SPEC__NUMA:
3966	ret = record__init_thread_numa_masks(rec, cpus);
3967	break;
3968	case THREAD_SPEC__USER:
3969	ret = record__init_thread_user_masks(rec, cpus);
3970	break;
3971	default:
3972	break;
3973	}
3974
3975	return ret;
3976	}
3977
3978	int cmd_record(int argc, const char **argv)
3979	{
3980	int err;
3981	struct record *rec = &record;
3982	char errbuf[BUFSIZ];
3983
3984	setlocale(LC_ALL, "");
3985
3986	#ifndef HAVE_BPF_SKEL
3987	# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3988	set_nobuild(`'\0'`, "off-cpu", "no BUILD_BPF_SKEL=1", true);
3989	# undef set_nobuild
3990	#endif
3991
3992	rec->opts.affinity = PERF_AFFINITY_SYS;
3993
3994	rec->evlist = evlist__new();
3995	if (rec->evlist == NULL)
3996	return -ENOMEM;
3997
3998	err = perf_config(fn: perf_record_config, rec);
3999	if (err)
4000	return err;
4001
4002	argc = parse_options(argc, argv, record_options, record_usage,
4003	PARSE_OPT_STOP_AT_NON_OPTION);
4004	if (quiet)
4005	perf_quiet_option();
4006
4007	err = symbol__validate_sym_arguments();
4008	if (err)
4009	return err;
4010
4011	perf_debuginfod_setup(di: &record.debuginfod);
4012
4013	/ Make system wide (-a) the default target. /
4014	if (!argc && target__none(target: &rec->opts.target))
4015	rec->opts.target.system_wide = true;
4016
4017	if (nr_cgroups && !rec->opts.target.system_wide) {
4018	usage_with_options_msg(record_usage, record_options,
4019	"cgroup monitoring only available in system-wide mode");
4020
4021	}
4022
4023	if (rec->buildid_mmap) {
4024	if (!perf_can_record_build_id()) {
4025	pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
4026	err = -EINVAL;
4027	goto out_opts;
4028	}
4029	pr_debug("Enabling build id in mmap2 events.\n");
4030	/ Enable mmap build id synthesizing. /
4031	symbol_conf.buildid_mmap2 = true;
4032	/ Enable perf_event_attr::build_id bit. /
4033	rec->opts.build_id = true;
4034	/ Disable build id cache. /
4035	rec->no_buildid = true;
4036	}
4037
4038	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4039	pr_err("Kernel has no cgroup sampling support.\n");
4040	err = -EINVAL;
4041	goto out_opts;
4042	}
4043
4044	if (rec->opts.kcore)
4045	rec->opts.text_poke = true;
4046
4047	if (rec->opts.kcore \|\| record__threads_enabled(rec))
4048	rec->data.is_dir = true;
4049
4050	if (record__threads_enabled(rec)) {
4051	if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4052	pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4053	goto out_opts;
4054	}
4055	if (record__aio_enabled(rec)) {
4056	pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4057	goto out_opts;
4058	}
4059	}
4060
4061	if (rec->opts.comp_level != `0`) {
4062	pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4063	rec->no_buildid = true;
4064	}
4065
4066	if (rec->opts.record_switch_events &&
4067	!perf_can_record_switch_events()) {
4068	ui__error(format: "kernel does not support recording context switch events\n");
4069	parse_options_usage(record_usage, record_options, "switch-events", `0`);
4070	err = -EINVAL;
4071	goto out_opts;
4072	}
4073
4074	if (switch_output_setup(rec)) {
4075	parse_options_usage(record_usage, record_options, "switch-output", `0`);
4076	err = -EINVAL;
4077	goto out_opts;
4078	}
4079
4080	if (rec->switch_output.time) {
4081	signal(SIGALRM, alarm_sig_handler);
4082	alarm(rec->switch_output.time);
4083	}
4084
4085	if (rec->switch_output.num_files) {
4086	rec->switch_output.filenames = calloc(sizeof(char *),
4087	rec->switch_output.num_files);
4088	if (!rec->switch_output.filenames) {
4089	err = -EINVAL;
4090	goto out_opts;
4091	}
4092	}
4093
4094	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4095	rec->timestamp_filename = false;
4096	pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4097	}
4098
4099	/*
4100	* Allow aliases to facilitate the lookup of symbols for address
4101	* filters. Refer to auxtrace_parse_filters().
4102	*/
4103	symbol_conf.allow_aliases = true;
4104
4105	symbol__init(NULL);
4106
4107	err = record__auxtrace_init(rec);
4108	if (err)
4109	goto out;
4110
4111	if (dry_run)
4112	goto out;
4113
4114	err = -ENOMEM;
4115
4116	if (rec->no_buildid_cache \|\| rec->no_buildid) {
4117	disable_buildid_cache();
4118	} else if (rec->switch_output.enabled) {
4119	/*
4120	* In 'perf record --switch-output', disable buildid
4121	* generation by default to reduce data file switching
4122	* overhead. Still generate buildid if they are required
4123	* explicitly using
4124	*
4125	* perf record --switch-output --no-no-buildid \
4126	* --no-no-buildid-cache
4127	*
4128	* Following code equals to:
4129	*
4130	* if ((rec->no_buildid \|\| !rec->no_buildid_set) &&
4131	* (rec->no_buildid_cache \|\| !rec->no_buildid_cache_set))
4132	* disable_buildid_cache();
4133	*/
4134	bool disable = true;
4135
4136	if (rec->no_buildid_set && !rec->no_buildid)
4137	disable = false;
4138	if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4139	disable = false;
4140	if (disable) {
4141	rec->no_buildid = true;
4142	rec->no_buildid_cache = true;
4143	disable_buildid_cache();
4144	}
4145	}
4146
4147	if (record.opts.overwrite)
4148	record.opts.tail_synthesize = true;
4149
4150	if (rec->evlist->core.nr_entries == `0`) {
4151	bool can_profile_kernel = perf_event_paranoid_check(max_level: `1`);
4152
4153	err = parse_event(evlist: rec->evlist, str: can_profile_kernel ? "cycles:P" : "cycles:Pu");
4154	if (err)
4155	goto out;
4156	}
4157
4158	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4159	rec->opts.no_inherit = true;
4160
4161	err = target__validate(target: &rec->opts.target);
4162	if (err) {
4163	target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4164	ui__warning(format: "%s\n", errbuf);
4165	}
4166
4167	err = target__parse_uid(target: &rec->opts.target);
4168	if (err) {
4169	int saved_errno = errno;
4170
4171	target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4172	ui__error(format: "%s", errbuf);
4173
4174	err = -saved_errno;
4175	goto out;
4176	}
4177
4178	/ Enable ignoring missing threads when -u/-p option is defined. /
4179	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX \|\| rec->opts.target.pid;
4180
4181	evlist__warn_user_requested_cpus(evlist: rec->evlist, cpu_list: rec->opts.target.cpu_list);
4182
4183	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4184	arch__add_leaf_frame_record_opts(opts: &rec->opts);
4185
4186	err = -ENOMEM;
4187	if (evlist__create_maps(evlist: rec->evlist, target: &rec->opts.target) < `0`) {
4188	if (rec->opts.target.pid != NULL) {
4189	pr_err("Couldn't create thread/CPU maps: %s\n",
4190	errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4191	goto out;
4192	}
4193	else
4194	usage_with_options(record_usage, record_options);
4195	}
4196
4197	err = auxtrace_record__options(itr: rec->itr, evlist: rec->evlist, opts: &rec->opts);
4198	if (err)
4199	goto out;
4200
4201	/*
4202	* We take all buildids when the file contains
4203	* AUX area tracing data because we do not decode the
4204	* trace because it would take too long.
4205	*/
4206	if (rec->opts.full_auxtrace)
4207	rec->buildid_all = true;
4208
4209	if (rec->opts.text_poke) {
4210	err = record__config_text_poke(evlist: rec->evlist);
4211	if (err) {
4212	pr_err("record__config_text_poke failed, error %d\n", err);
4213	goto out;
4214	}
4215	}
4216
4217	if (rec->off_cpu) {
4218	err = record__config_off_cpu(rec);
4219	if (err) {
4220	pr_err("record__config_off_cpu failed, error %d\n", err);
4221	goto out;
4222	}
4223	}
4224
4225	if (record_opts__config(opts: &rec->opts)) {
4226	err = -EINVAL;
4227	goto out;
4228	}
4229
4230	err = record__config_tracking_events(rec);
4231	if (err) {
4232	pr_err("record__config_tracking_events failed, error %d\n", err);
4233	goto out;
4234	}
4235
4236	err = record__init_thread_masks(rec);
4237	if (err) {
4238	pr_err("Failed to initialize parallel data streaming masks\n");
4239	goto out;
4240	}
4241
4242	if (rec->opts.nr_cblocks > nr_cblocks_max)
4243	rec->opts.nr_cblocks = nr_cblocks_max;
4244	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4245
4246	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4247	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4248
4249	if (rec->opts.comp_level > comp_level_max)
4250	rec->opts.comp_level = comp_level_max;
4251	pr_debug("comp level: %d\n", rec->opts.comp_level);
4252
4253	err = __cmd_record(rec: &record, argc, argv);
4254	out:
4255	evlist__delete(evlist: rec->evlist);
4256	symbol__exit();
4257	auxtrace_record__free(itr: rec->itr);
4258	out_opts:
4259	record__free_thread_masks(rec, nr_threads: rec->nr_threads);
4260	rec->nr_threads = `0`;
4261	evlist__close_control(ctl_fd: rec->opts.ctl_fd, ctl_fd_ack: rec->opts.ctl_fd_ack, ctl_fd_close: &rec->opts.ctl_fd_close);
4262	return err;
4263	}
4264
4265	static void snapshot_sig_handler(int sig __maybe_unused)
4266	{
4267	struct record *rec = &record;
4268
4269	hit_auxtrace_snapshot_trigger(rec);
4270
4271	if (switch_output_signal(rec))
4272	trigger_hit(t: &switch_output_trigger);
4273	}
4274
4275	static void alarm_sig_handler(int sig __maybe_unused)
4276	{
4277	struct record *rec = &record;
4278
4279	if (switch_output_time(rec))
4280	trigger_hit(t: &switch_output_trigger);
4281	}
4282

source code of linux/tools/perf/builtin-record.c